diff --git a/Make.inc.in b/Make.inc.in index 1fa3179c..858ad336 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -67,6 +67,26 @@ UTILMODNAME=@UTILMODNAME@ CBINDLIBNAME=libpsb_cbind.a +CUDAD=@CUDAD@ +CUDALD=@CUDALD@ +LCUDA=@LCUDA@ + +SPGPU_LIBS=@SPGPU_LIBS@ + +CUDA_DIR=@CUDA_DIR@ +CUDA_DEFINES=@CUDA_DEFINES@ +CUDA_INCLUDES=@CUDA_INCLUDES@ +CUDA_LIBS=@CUDA_LIBS@ +CUDA_VERSION=@CUDA_VERSION@ +CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@ +NVCC=@CUDA_NVCC@ +CUDEFINES=@CUDEFINES@ + +.SUFFIXES: .cu +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $< + @PSBLASRULES@ +PSBGPULDLIBS=$(LCUDA) $(SPGPU_LIBS) $(CUDA_LIBS) $(PSBLDLIBS) $(LIBS) \ No newline at end of file diff --git a/Makefile b/Makefile index a0f5ec3e..95f4cb17 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include Make.inc -all: dirs based precd kryld utild cbindd libd +all: dirs based precd kryld utild cbindd extd $(CUDAD) libd @echo "=====================================" @echo "PSBLAS libraries Compilation Successful." @@ -12,15 +12,20 @@ dirs: precd: based utild: based kryld: precd - +extd: based +cudad: extd cbindd: based precd kryld utild -libd: based precd kryld utild cbindd +libd: based precd kryld utild cbindd extd $(CUDALD) $(MAKE) -C base lib $(MAKE) -C prec lib $(MAKE) -C krylov lib $(MAKE) -C util lib $(MAKE) -C cbind lib + $(MAKE) -C ext lib +cudald: cudad + $(MAKE) -C cuda lib + based: $(MAKE) -C base objs @@ -32,6 +37,10 @@ utild: $(MAKE) -C util objs cbindd: $(MAKE) -C cbind objs +extd: based + $(MAKE) -C ext objs +cudad: based extd + $(MAKE) -C cuda objs install: all @@ -56,6 +65,8 @@ clean: $(MAKE) -C krylov clean $(MAKE) -C util clean $(MAKE) -C cbind clean + $(MAKE) -C ext clean + $(MAKE) -C cuda clean check: all make check -C test/serial @@ -71,6 +82,8 @@ veryclean: cleanlib cd krylov && $(MAKE) veryclean cd util && $(MAKE) veryclean cd cbind && $(MAKE) veryclean + cd ext && $(MAKE) veryclean + cd cuda && $(MAKE) veryclean cd test/fileread && $(MAKE) clean cd test/pargen && $(MAKE) clean cd test/util && $(MAKE) clean diff --git a/README.md b/README.md index a9813f5e..e51fe70b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -PSBLAS library, version 3.8 +PSBLAS library, version 3.9 =========================== The architecture of the Fortran 2003 sparse BLAS is described in: @@ -25,7 +25,7 @@ Harwell-Boeing and MatrixMarket file formats. DOCUMENTATION ------------- -See docs/psblas-3.8.pdf; an HTML version of the same document is +See docs/psblas-3.9.pdf; an HTML version of the same document is available in docs/html. Please consult the sample programs, especially test/pargen/psb_[sd]_pde[23]d.f90 @@ -40,6 +40,15 @@ The main reference for the serial sparse BLAS is: >linear algebra subprograms for sparse matrices: a user level interface, >ACM Trans. Math. Softw., 23(3), 379-401, 1997. +CUDA and GPU support +-------------------- +This version of PSBLAS incorporates into a single package three +entities that were previouslty separated: +1. PSBLAS -- the base library +2. PSBLAS-EXT -- a library providing additional storage formats +3. SPGPU -- a package of kernels for NVIDIA GPUs originally + written by Davide Barbieri and Salvatore Filippone; + see the license file cuda/License-spgpu.md INSTALLING ---------- @@ -61,6 +70,11 @@ prerequisites (see also SERIAL below): specify `--with-amd` (see `./configure --help` for more details). We use the C interface to AMD. +5. If you have CUDA available, use + --with-cuda= to specify the CUDA toolkit location + --with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute + capabilities) to compile the CUDA code for. + The configure script will generate a Make.inc file suitable for building the library. The script is capable of recognizing the needed libraries with their default names; if they are in unusual places consider adding diff --git a/base/comm/internals/psi_cswapdata_a.F90 b/base/comm/internals/psi_cswapdata_a.F90 index 715b674e..acf7f342 100644 --- a/base/comm/internals/psi_cswapdata_a.F90 +++ b/base/comm/internals/psi_cswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_cswaptran_a.F90 b/base/comm/internals/psi_cswaptran_a.F90 index a7f2c687..0aa5d6f6 100644 --- a/base/comm/internals/psi_cswaptran_a.F90 +++ b/base/comm/internals/psi_cswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90 index aff32517..ff562994 100644 --- a/base/comm/internals/psi_dswapdata_a.F90 +++ b/base/comm/internals/psi_dswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_dswaptran_a.F90 b/base/comm/internals/psi_dswaptran_a.F90 index ed13df40..a0506a2b 100644 --- a/base/comm/internals/psi_dswaptran_a.F90 +++ b/base/comm/internals/psi_dswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswapdata_a.F90 b/base/comm/internals/psi_eswapdata_a.F90 index 6a644563..acc6a6bf 100644 --- a/base/comm/internals/psi_eswapdata_a.F90 +++ b/base/comm/internals/psi_eswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_eswaptran_a.F90 b/base/comm/internals/psi_eswaptran_a.F90 index 78ed7d8b..ce12a7e7 100644 --- a/base/comm/internals/psi_eswaptran_a.F90 +++ b/base/comm/internals/psi_eswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swapdata_a.F90 b/base/comm/internals/psi_i2swapdata_a.F90 index 42b4498e..e2a8ad4e 100644 --- a/base/comm/internals/psi_i2swapdata_a.F90 +++ b/base/comm/internals/psi_i2swapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_i2swaptran_a.F90 b/base/comm/internals/psi_i2swaptran_a.F90 index f94bf29e..36652565 100644 --- a/base/comm/internals/psi_i2swaptran_a.F90 +++ b/base/comm/internals/psi_i2swaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswapdata_a.F90 b/base/comm/internals/psi_mswapdata_a.F90 index e71f3a52..c7f1965b 100644 --- a/base/comm/internals/psi_mswapdata_a.F90 +++ b/base/comm/internals/psi_mswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_mswaptran_a.F90 b/base/comm/internals/psi_mswaptran_a.F90 index 3a780142..1e9fcede 100644 --- a/base/comm/internals/psi_mswaptran_a.F90 +++ b/base/comm/internals/psi_mswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswapdata_a.F90 b/base/comm/internals/psi_sswapdata_a.F90 index 044dc141..c802578c 100644 --- a/base/comm/internals/psi_sswapdata_a.F90 +++ b/base/comm/internals/psi_sswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_sswaptran_a.F90 b/base/comm/internals/psi_sswaptran_a.F90 index 434cec4c..c4290be1 100644 --- a/base/comm/internals/psi_sswaptran_a.F90 +++ b/base/comm/internals/psi_sswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswapdata_a.F90 b/base/comm/internals/psi_zswapdata_a.F90 index 2d265c76..ec21072c 100644 --- a/base/comm/internals/psi_zswapdata_a.F90 +++ b/base/comm/internals/psi_zswapdata_a.F90 @@ -191,7 +191,9 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -676,7 +678,9 @@ subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, & logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/comm/internals/psi_zswaptran_a.F90 b/base/comm/internals/psi_zswaptran_a.F90 index 508d4045..c1062839 100644 --- a/base/comm/internals/psi_zswaptran_a.F90 +++ b/base/comm/internals/psi_zswaptran_a.F90 @@ -195,7 +195,9 @@ subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ @@ -688,7 +690,9 @@ subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,& logical, parameter :: usersend=.false. complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf +#if !defined(FLANG) volatile :: sndbuf, rcvbuf +#endif character(len=20) :: name info=psb_success_ diff --git a/base/internals/psi_adjcncy_fnd_owner.F90 b/base/internals/psi_adjcncy_fnd_owner.F90 index 639cdb5d..83e18623 100644 --- a/base/internals/psi_adjcncy_fnd_owner.F90 +++ b/base/internals/psi_adjcncy_fnd_owner.F90 @@ -87,7 +87,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) integer(psb_lpk_) :: mglob, ih type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me - logical, parameter :: gettime=.true., debug=.false. + logical, parameter :: debug=.false. integer(psb_mpk_) :: xchg_alg logical, parameter :: do_timings=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 @@ -132,10 +132,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info) goto 9999 end if - if (gettime) then - t0 = psb_wtime() - end if - nadj = size(adj) nidx = size(idx) call psb_realloc(nidx,iprc,info) diff --git a/base/internals/psi_desc_index.F90 b/base/internals/psi_desc_index.F90 index 35c8d921..6423c116 100644 --- a/base/internals/psi_desc_index.F90 +++ b/base/internals/psi_desc_index.F90 @@ -137,7 +137,7 @@ subroutine psi_i_desc_index(desc,index_in,dep_list,& & idxr, idxs, iszs, iszr, nesd, nerv, ixp, idx integer(psb_mpk_) :: icomm, minfo - logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false. + logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1, idx_phase4=-1 logical, parameter :: usempi=.false. integer(psb_ipk_) :: debug_level, debug_unit diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 0fdff04b..38b740a7 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -99,6 +99,33 @@ module psi_c_serial_mod end subroutine psi_caxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_cabgdxyz + end interface psi_abgdxyz + + interface psi_xyzw + subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (inout) :: w(:) + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_cxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 0ce14dbb..1d65c5f6 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -99,6 +99,33 @@ module psi_d_serial_mod end subroutine psi_daxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_dabgdxyz + end interface psi_abgdxyz + + interface psi_xyzw + subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (inout) :: w(:) + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_dxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index f0372e01..6f4e8c06 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -99,6 +99,33 @@ module psi_e_serial_mod end subroutine psi_eaxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_eabgdxyz + end interface psi_abgdxyz + + interface psi_xyzw + subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (inout) :: w(:) + integer(psb_epk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_exyzw + end interface psi_xyzw + interface psi_gth subroutine psi_egthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index 70dd95e1..ffa14059 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -99,6 +99,33 @@ module psi_i2_serial_mod end subroutine psi_i2axpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i2abgdxyz + end interface psi_abgdxyz + + interface psi_xyzw + subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (inout) :: w(:) + integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_i2xyzw + end interface psi_xyzw + interface psi_gth subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index cfd1348e..5661fdbf 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -99,6 +99,33 @@ module psi_m_serial_mod end subroutine psi_maxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_mabgdxyz + end interface psi_abgdxyz + + interface psi_xyzw + subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (inout) :: w(:) + integer(psb_mpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_mxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 25c4a7ef..5cc17d58 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -99,6 +99,33 @@ module psi_s_serial_mod end subroutine psi_saxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_sabgdxyz + end interface psi_abgdxyz + + interface psi_xyzw + subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (inout) :: w(:) + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_sxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_spk_ diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index b40cf05a..8a3f053d 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -99,6 +99,33 @@ module psi_z_serial_mod end subroutine psi_zaxpbyv2 end interface psb_geaxpby + interface psi_abgdxyz + subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta + integer(psb_ipk_), intent(out) :: info + end subroutine psi_zabgdxyz + end interface psi_abgdxyz + + interface psi_xyzw + subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (inout) :: w(:) + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + end subroutine psi_zxyzw + end interface psi_xyzw + interface psi_gth subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y) import :: psb_ipk_, psb_dpk_ diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90 index 226d9d2b..ad0bc74c 100644 --- a/base/modules/desc/psb_desc_mod.F90 +++ b/base/modules/desc/psb_desc_mod.F90 @@ -409,7 +409,7 @@ contains ! ! Since the hashed lists take up (somewhat) more than 2*N_COL integers, ! it makes no sense to use them if you don't have at least - ! 3 processes, no matter what the size of the process. + ! 3 processes, no matter what the size of the index space. ! val = psb_cd_is_large_size(m) .and. (np > 2) end function psb_cd_choose_large_state diff --git a/base/modules/desc/psb_gen_block_map_mod.F90 b/base/modules/desc/psb_gen_block_map_mod.F90 index f0c433e0..82a4cc15 100644 --- a/base/modules/desc/psb_gen_block_map_mod.F90 +++ b/base/modules/desc/psb_gen_block_map_mod.F90 @@ -215,7 +215,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,owned_,info) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -229,9 +231,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,owned_,info) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%min_glob_row + idx(i) - 1 @@ -243,7 +247,7 @@ contains info = -1 end if end do - + !$omp end parallel do end if end subroutine block_ll2gv1 @@ -277,7 +281,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap,owned_,info,im) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -291,9 +297,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap,owned_,info,im) & + !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then idxout(i) = idxmap%min_glob_row + idxin(i) - 1 @@ -305,7 +313,7 @@ contains info = -1 end if end do - + !$omp end parallel do end if if (is > im) then @@ -392,6 +400,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,owned_) & + !$omp private(i,nv,tidx) do i=1, is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and. & @@ -408,7 +419,11 @@ contains end if end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,owned_) & + !$omp private(i,ip,lip,tidx,info) do i=1,is if (mask(i)) then if ((idxmap%min_glob_row <= idx(i)).and.& @@ -424,8 +439,8 @@ contains end if end if end do + !$omp end parallel do else -!!$ write(0,*) 'Block status: invalid ',idxmap%get_state() idx(1:is) = -1 info = -1 end if @@ -433,6 +448,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,idxmap,owned_) & + !$omp private(i,nv,tidx) do i=1, is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -447,8 +465,11 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,idxmap,owned_) & + !$omp private(i,ip,lip,tidx,info) do i=1,is if ((idxmap%min_glob_row <= idx(i)).and.& & (idx(i) <= idxmap%max_glob_row)) then @@ -462,6 +483,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -953,7 +975,9 @@ contains end if info = psb_success_ else - info = -5 + write(0,*) 'From has_search_ins:',info,ip,lip,nxt,& + & idxmap%min_glob_row,idxmap%max_glob_row + info = -6 return end if idxout(i) = lip + idxmap%local_rows @@ -1131,7 +1155,7 @@ contains idxmap%global_cols = ntot idxmap%local_rows = nl idxmap%local_cols = nl - idxmap%ctxt = ctxt + idxmap%ctxt = ctxt idxmap%state = psb_desc_bld_ idxmap%mpic = psb_get_mpi_comm(ctxt) idxmap%min_glob_row = vnl(iam)+1 diff --git a/base/modules/desc/psb_hash_map_mod.F90 b/base/modules/desc/psb_hash_map_mod.F90 index 216c7aa4..c3d833c6 100644 --- a/base/modules/desc/psb_hash_map_mod.F90 +++ b/base/modules/desc/psb_hash_map_mod.F90 @@ -63,7 +63,7 @@ module psb_hash_map_mod integer(psb_lpk_) :: hashvsize, hashvmask integer(psb_ipk_), allocatable :: hashv(:) integer(psb_lpk_), allocatable :: glb_lc(:,:), loc_to_glob(:) - type(psb_hash_type) :: hash + type(psb_hash_type) :: hash contains @@ -221,6 +221,9 @@ contains if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -233,9 +236,12 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -246,7 +252,7 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do end if end subroutine hash_l2gv1 @@ -363,6 +369,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is if (mask(i)) then ip = idx(i) @@ -388,7 +397,7 @@ contains endif end if enddo - + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 @@ -404,6 +413,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is ip = idx(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -427,14 +439,12 @@ contains idx(i) = lip endif enddo - + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idx(1:is) = -1 info = -1 - end if - end if end subroutine hash_g2lv1 @@ -493,6 +503,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is if (mask(i)) then ip = idxin(i) @@ -518,6 +531,7 @@ contains endif end if enddo + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 @@ -533,6 +547,9 @@ contains else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) & + !$omp private(i,ip,lip,tlip,info) do i = 1, is ip = idxin(i) if ((ip < 1 ).or.(ip>mglob)) then @@ -556,14 +573,12 @@ contains idxout(i) = lip endif enddo - + !$omp end parallel do else write(0,*) 'Hash status: invalid ',idxmap%get_state() idxout(1:is) = -1 info = -1 - end if - end if end subroutine hash_g2lv2 @@ -645,11 +660,11 @@ contains logical, intent(in), optional :: mask(:) integer(psb_ipk_), intent(in), optional :: lidx(:) - integer(psb_ipk_) :: i, is, lip, nrow, ncol, & + integer(psb_ipk_) :: i, is, lip, nrow, ncol,& & err_act integer(psb_lpk_) :: mglob, ip, nxt, tlip type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: me, np + integer(psb_ipk_) :: me, np,ith character(len=20) :: name,ch_err logical, allocatable :: mask_(:) !!$ logical :: use_openmp = .true. @@ -683,370 +698,249 @@ contains mglob = idxmap%get_gr() nrow = idxmap%get_lr() !write(0,*) me,name,' before loop ',psb_errstatus_fatal() -#if 0 && defined(OPENMP) - !call OMP_init_lock(ins_lck) - - if (idxmap%is_bld()) then - - isLoopValid = .true. - ncol = idxmap%get_lc() - if (present(mask)) then - mask_ = mask - else - allocate(mask_(size(idx))) - mask_ = .true. - end if +#if defined(OPENMP) + isLoopValid = .true. + if (idxmap%is_bld()) then if (present(lidx)) then - if (present(mask)) then - !$omp critical(hash_g2l_ins) - - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz,lidx) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) + if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle - if (mask(i)) then - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + if (mask(i)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob) ) then idx(i) = -1 cycle endif - call OMP_set_lock(ins_lck) ncol = idxmap%get_lc() - call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - if (lip < 0) then - call OMP_set_lock(ins_lck) - - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing). - ncol = idxmap%get_lc() - nxt = lidx(i) - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - - if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - ! Index not found - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - lip = tlip - if (info /= 0) write(0,*) ' inskey 1 info:',info - - if (info >= 0) then - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) - - if (info /= psb_success_) then - !write(0,*) 'Error spot 1' - call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - - isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) - end if - end if - if (isLoopValid) info = 0 + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + tlip = lip + nxt = lidx(i) + if (nxt <= nrow) then + idx(i) = -1 + else + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) + if (lip > 0) then + idx(i) = lip else - idx(i) = -1 + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& + & pad=-1_psb_lpk_,addsz=laddsz) + if (info /= psb_success_) then + info=1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='psb_ensure_size',i_err=(/info/)) + isLoopValid = .false. + end if + idxmap%loc_to_glob(nxt) = ip + nxt = max(ncol,nxt) + call idxmap%set_lc(nxt) + endif + idx(i) = lip + info = psb_success_ + else + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. + end if end if - call OMP_unset_lock(ins_lck) - end if - else - idx(i) = lip + endif + !$omp end critical(hash_g2l_ins) end if else idx(i) = -1 end if + enddo + !$omp end parallel do - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) - - if (.not. isLoopValid) then - goto 9999 - end if - else - !$omp critical(hash_g2l_ins) + else if (.not.present(mask)) then - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz,lidx) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob) ) then idx(i) = -1 cycle endif - call OMP_set_lock(ins_lck) ncol = idxmap%get_lc() - call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - if (lip < 0) then - call OMP_set_lock(ins_lck) - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing). - ncol = idxmap%get_lc() - nxt = lidx(i) - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - - if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) - lip = tlip -!!$ if (info /= 0) write(0,*) ' inskey 2 info:',info - if (info >= 0) then - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& - & pad=-1_psb_lpk_,addsz=laddsz) - - if (info /= psb_success_) then - !write(0,*) 'Error spot 2' - call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - - isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) - end if - end if - if (isLoopValid) info = 0 + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + tlip = lip + nxt = lidx(i) + if (nxt <= nrow) then + idx(i) = -1 + else + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) + if (lip > 0) then + idx(i) = lip else - idx(i) = -1 + call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& + & pad=-1_psb_lpk_,addsz=laddsz) + if (info /= psb_success_) then + info=1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='psb_ensure_size',i_err=(/info/)) + isLoopValid = .false. + end if + idxmap%loc_to_glob(nxt) = ip + nxt = max(ncol,nxt) + call idxmap%set_lc(nxt) + endif + idx(i) = lip + info = psb_success_ + else + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. + end if end if - call OMP_unset_lock(ins_lck) - end if - else - idx(i) = lip + endif + !$omp end critical(hash_g2l_ins) end if - - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) - - if (.not. isLoopValid) then - goto 9999 - end if + enddo + !$omp end parallel do end if + else if (.not.present(lidx)) then - if(present(mask)) then - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) - !$omp critical(hash_g2l_ins) + if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle if (mask(i)) then - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob)) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) - ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. - !write(0,*) me,name,' b hic 1 ',psb_errstatus_fatal() + ncol = idxmap%get_lc() call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - !write(0,*) me,name,' a hic 1 ',psb_errstatus_fatal() - if (lip < 0) then - !call OMP_set_lock(ins_lck) - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing, so this lock is relatively rare). - ncol = idxmap%get_lc() - nxt = ncol + 1 - !write(0,*) me,name,' b hic 2 ',psb_errstatus_fatal() - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - !write(0,*) me,name,' a hic 2 ',psb_errstatus_fatal() + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + ncol = idxmap%get_lc() + nxt = ncol + 1 + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - ! Index not found - !write(0,*) me,name,' b hsik ',psb_errstatus_fatal() + idx(i) = lip + else call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) -!!$ if (info /= 0) write(0,*) ' inskey 3 info:',info - if (psb_errstatus_fatal()) write(0,*) me,name,' a hsik ',info,omp_get_thread_num() - !write(0,*) me,name,' a hsik ',psb_errstatus_fatal() lip = tlip - - if (info >= 0) then - !write(0,*) 'Error before spot 3', info - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) - if (psb_errstatus_fatal()) write(0,*) me,name,' a esz ',info,omp_get_thread_num() if (info /= psb_success_) then - write(0,*) 'Error spot 3', info + info=1 call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - + & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) end if - end if - if (isLoopValid) info = 0 + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) + endif + idx(i) = lip + info = psb_success_ else - idx(i) = -1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. end if -!!$ if (info /= 0) write(0,*) ' inskey 3.5 info:',info, isLoopValid - !call OMP_unset_lock(ins_lck) end if - else - idx(i) = lip + !$omp end critical(hash_g2l_ins) end if else idx(i) = -1 end if + enddo + !$omp end parallel do - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) + else if (.not.present(mask)) then - if (.not. isLoopValid) then - goto 9999 - end if - else - ! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & - ! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & - ! $ OMP private(i,ip,lip,tlip,nxt,info) & - ! $ OMP reduction(.AND.:isLoopValid) - !$omp critical(hash_g2l_ins) + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & + !$omp private(i,ip,lip,tlip,nxt,info) & + !$omp reduction(.AND.:isLoopValid) do i = 1, is - info = 0 - if (.not. isLoopValid) cycle - ip = idx(i) - if ((ip < 1 ).or.(ip>mglob)) then + ip = idx(i) + if ((ip < 1 ).or.(ip>mglob)) then idx(i) = -1 cycle endif - !call OMP_set_lock(ins_lck) - ncol = idxmap%get_lc() - !call OMP_unset_lock(ins_lck) - - ! At first, we check the index presence in 'idxmap'. Usually - ! the index is found. If it is not found, we repeat the checking, - ! but inside a critical region. + ncol = idxmap%get_lc() call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) - if (lip < 0) then - !call OMP_set_lock(ins_lck) - ! We check again if the index is already in 'idxmap', this - ! time inside a critical region (we assume that the index - ! is often already existing). - ncol = idxmap%get_lc() - nxt = ncol + 1 - call hash_inner_cnv(ip,lip,idxmap%hashvmask,& - & idxmap%hashv,idxmap%glb_lc,ncol) - + if (lip > 0) then + idx(i) = lip + info = psb_success_ + else + !$omp critical(hash_g2l_ins) + ncol = idxmap%get_lc() + nxt = ncol + 1 + call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,& + & idxmap%glb_lc,ncol) if (lip > 0) then - idx(i) = lip - else if (lip < 0) then - ! Index not found + idx(i) = lip + else call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) lip = tlip -!!$ if (info /= 0) write(0,*) ' inskey 4 info:',info - - if (info >= 0) then - ! 'nxt' is not equal to 'tlip' when the key is already inside - ! the hash map. In that case 'tlip' is the value corresponding - ! to the existing mapping. - if (nxt == tlip) then - - ncol = MAX(ncol,nxt) - - call psb_ensure_size(ncol,idxmap%loc_to_glob,info,& + if (info >=0) then + if (nxt == lip) then + call psb_ensure_size(lip,idxmap%loc_to_glob,info,& & pad=-1_psb_lpk_,addsz=laddsz) - if (info /= psb_success_) then - !write(0,*) 'Error spot 4' + info=1 call psb_errpush(psb_err_from_subroutine_ai_,name,& - &a_err='psb_ensure_size',i_err=(/info/)) - + & a_err='psb_ensure_size',i_err=(/info/)) isLoopValid = .false. - idx(i) = -1 - else - idx(i) = lip - idxmap%loc_to_glob(nxt) = ip - call idxmap%set_lc(ncol) end if - end if - if (isLoopValid) info = 0 + idxmap%loc_to_glob(nxt) = ip + call idxmap%set_lc(nxt) + endif + idx(i) = lip + info = psb_success_ else - idx(i) = -1 + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='SearchInsKeyVal',i_err=(/info/)) + isLoopValid = .false. end if - !call OMP_unset_lock(ins_lck) end if - - else - idx(i) = lip + !$omp end critical(hash_g2l_ins) end if - - end do - ! $ OMP END PARALLEL DO - !$omp end critical(hash_g2l_ins) - - if (.not. isLoopValid) then - goto 9999 - end if - + enddo + !$omp end parallel do end if end if else @@ -1054,7 +948,7 @@ contains idx = -1 info = -1 end if - !call OMP_destroy_lock(ins_lck) + if (.not. isLoopValid) goto 9999 #else !!$ else if (.not.use_openmp) then isLoopValid = .true. @@ -1073,13 +967,13 @@ contains call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%hashv,idxmap%glb_lc,ncol) if (lip < 0) then - tlip = lip nxt = lidx(i) if (nxt <= nrow) then idx(i) = -1 cycle endif call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) + lip = tlip if (info >=0) then if (nxt == tlip) then ncol = max(ncol,nxt) @@ -1754,6 +1648,9 @@ contains ! for a width of psb_hash_bits ! if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,glb_lc,nrm,mask) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n if (mask(i)) then key = x(i) @@ -1791,7 +1688,11 @@ contains end if end if end do + !$omp end parallel do else + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,glb_lc,nrm) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n key = x(i) ih = iand(key,hashmask) @@ -1827,6 +1728,7 @@ contains x(i) = tmp end if end do + !$omp end parallel do end if end subroutine hash_inner_cnv1 @@ -1849,6 +1751,9 @@ contains ! for a width of psb_hash_bits ! if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n if (mask(i)) then key = x(i) @@ -1889,9 +1794,12 @@ contains end if end if end do - + !$omp end parallel do else + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) & + !$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm) do i=1, n key = x(i) ih = iand(key,hashmask) @@ -1930,6 +1838,7 @@ contains y(i) = tmp end if end do + !$omp end parallel do end if end subroutine hash_inner_cnv2 diff --git a/base/modules/desc/psb_hash_mod.F90 b/base/modules/desc/psb_hash_mod.F90 index eb5556a2..18b1142d 100644 --- a/base/modules/desc/psb_hash_mod.F90 +++ b/base/modules/desc/psb_hash_mod.F90 @@ -383,12 +383,12 @@ contains integer(psb_lpk_), intent(out) :: val integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: hsize,hmask, hk, hd + integer(psb_ipk_) :: hsize,hmask, hk, hd, i + logical :: redo info = HashOK hsize = hash%hsize hmask = hash%hmask - val = -1 hk = iand(psb_hashval(key),hmask) if (hk == 0) then hd = 1 @@ -400,56 +400,57 @@ contains info = HashOutOfMemory return end if - + val = -1 + !$omp atomic hash%nsrch = hash%nsrch + 1 + !$omp end atomic do + !$omp atomic hash%nacc = hash%nacc + 1 + !$omp end atomic if (hash%table(hk,1) == key) then val = hash%table(hk,2) info = HashDuplicate - !write(0,*) 'In searchinskey 1 : ', info, HashDuplicate return end if + redo = .false. !$omp critical(hashsearchins) - if (hash%table(hk,1) == key) then - val = hash%table(hk,2) - info = HashDuplicate - else - if (hash%table(hk,1) == HashFreeEntry) then - if (hash%nk == hash%hsize -1) then - ! - ! Note: because of the way we allocate things at CDALL - ! time this is really unlikely; if we get here, we - ! have at least as many halo indices as internals, which - ! means we're already in trouble. But we try to keep going. - ! - call psb_hash_realloc(hash,info) - if (info /= HashOk) then - info = HashOutOfMemory - !return - else - call psb_hash_searchinskey(key,val,nextval,hash,info) - !return - end if + if (hash%table(hk,1) == HashFreeEntry) then + if (hash%nk == hash%hsize -1) then + ! + ! Note: because of the way we allocate things at CDALL + ! time this is really unlikely; if we get here, we + ! have at least as many halo indices as internals, which + ! means we're already in trouble. But we try to keep going. + ! + call psb_hash_realloc(hash,info) + if (info /= HashOk) then + info = HashOutOfMemory else - hash%nk = hash%nk + 1 - hash%table(hk,1) = key - hash%table(hk,2) = nextval - val = nextval - !return + redo = .true. end if + else + hash%nk = hash%nk + 1 + hash%table(hk,1) = key + hash%table(hk,2) = nextval + val = nextval + info = HashOk end if + else if (hash%table(hk,1) == key) then + val = hash%table(hk,2) + info = HashDuplicate + else + info = HashNotFound end if !$omp end critical(hashsearchins) - if (info /= HashOk) then - write(0,*) 'In searchinskey 2: ', info + if (redo) then + call psb_hash_searchinskey(key,val,nextval,hash,info) return end if - if (val > 0) return + if (val > 0) exit hk = hk - hd if (hk < 0) hk = hk + hsize end do - !write(0,*) 'In searchinskey 3: ', info end subroutine psb_hash_lsearchinskey recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info) @@ -459,10 +460,11 @@ contains integer(psb_ipk_) :: hsize,hmask, hk, hd logical :: redo + info = HashOK hsize = hash%hsize hmask = hash%hmask - + hk = iand(psb_hashval(key),hmask) if (hk == 0) then hd = 1 @@ -475,17 +477,22 @@ contains return end if val = -1 + val = -1 + !$omp atomic hash%nsrch = hash%nsrch + 1 + !$omp end atomic do + !$omp atomic hash%nacc = hash%nacc + 1 + !$omp end atomic if (hash%table(hk,1) == key) then val = hash%table(hk,2) info = HashDuplicate return end if redo = .false. - !$OMP CRITICAL - if (hash%table(hk,1) == HashFreeEntry) then + !$omp critical(hashsearchins) + if (hash%table(hk,1) == HashFreeEntry) then if (hash%nk == hash%hsize -1) then ! ! Note: because of the way we allocate things at CDALL @@ -496,24 +503,28 @@ contains call psb_hash_realloc(hash,info) if (info /= HashOk) then info = HashOutOfMemory - !return else redo = .true. -!!$ call psb_hash_searchinskey(key,val,nextval,hash,info) -!!$ return end if else hash%nk = hash%nk + 1 hash%table(hk,1) = key hash%table(hk,2) = nextval val = nextval - !return + info = HashOk end if + else if (hash%table(hk,1) == key) then + val = hash%table(hk,2) + info = HashDuplicate + else + info = HashNotFound end if - !$OMP END CRITICAL - if (redo) call psb_hash_searchinskey(key,val,nextval,hash,info) - if (info /= HashOk) return - if (val > 0) return + !$omp end critical(hashsearchins) + if (redo) then + call psb_hash_searchinskey(key,val,nextval,hash,info) + return + end if + if (val > 0) exit hk = hk - hd if (hk < 0) hk = hk + hsize end do @@ -551,7 +562,7 @@ contains end if if (hash%table(hk,1) == HashFreeEntry) then val = HashFreeEntry -! !$ info = HashNotFound + info = HashNotFound return end if hk = hk - hd @@ -591,7 +602,7 @@ contains end if if (hash%table(hk,1) == HashFreeEntry) then val = HashFreeEntry -! !$ info = HashNotFound + info = HashNotFound return end if hk = hk - hd diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.F90 similarity index 96% rename from base/modules/desc/psb_indx_map_mod.f90 rename to base/modules/desc/psb_indx_map_mod.F90 index 7753db23..fa3e5a69 100644 --- a/base/modules/desc/psb_indx_map_mod.f90 +++ b/base/modules/desc/psb_indx_map_mod.F90 @@ -153,11 +153,25 @@ module psb_indx_map_mod procedure, pass(idxmap) :: set_gci => base_set_gci procedure, pass(idxmap) :: set_grl => base_set_grl procedure, pass(idxmap) :: set_gcl => base_set_gcl +#if defined(IPK4) && defined(LPK8) + generic, public :: set_gr => set_grl, set_gri + generic, public :: set_gc => set_gcl, set_gci +#else generic, public :: set_gr => set_grl generic, public :: set_gc => set_gcl - - procedure, pass(idxmap) :: set_lr => base_set_lr - procedure, pass(idxmap) :: set_lc => base_set_lc +#endif + procedure, pass(idxmap) :: set_lri => base_set_lri + procedure, pass(idxmap) :: set_lrl => base_set_lrl + procedure, pass(idxmap) :: set_lci => base_set_lci + procedure, pass(idxmap) :: set_lcl => base_set_lcl + procedure, pass(idxmap) :: inc_lc => base_inc_lc +#if defined(IPK4) && defined(LPK8) + generic, public :: set_lr => set_lrl, set_lri + generic, public :: set_lc => set_lcl, set_lci +#else + generic, public :: set_lr => set_lri + generic, public :: set_lc => set_lci +#endif procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy @@ -235,7 +249,8 @@ module psb_indx_map_mod & base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,& & base_get_mpic, base_sizeof, base_set_null, & & base_set_grl, base_set_gcl, & - & base_set_lr, base_set_lc, base_set_ctxt,& + & base_set_lri, base_set_lci, base_set_lrl, base_set_lcl, & + & base_inc_lc, base_set_ctxt,& & base_set_mpic, base_get_fmt, base_asb, base_free,& & base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,& & base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,& @@ -557,21 +572,47 @@ contains idxmap%global_cols = val end subroutine base_set_gcl - subroutine base_set_lr(idxmap,val) + subroutine base_set_lri(idxmap,val) implicit none class(psb_indx_map), intent(inout) :: idxmap integer(psb_ipk_), intent(in) :: val idxmap%local_rows = val - end subroutine base_set_lr + end subroutine base_set_lri - subroutine base_set_lc(idxmap,val) + subroutine base_set_lci(idxmap,val) implicit none class(psb_indx_map), intent(inout) :: idxmap integer(psb_ipk_), intent(in) :: val + !$omp critical + idxmap%local_cols = val + !$omp end critical + end subroutine base_set_lci + + subroutine base_set_lrl(idxmap,val) + implicit none + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_lpk_), intent(in) :: val + + idxmap%local_rows = val + end subroutine base_set_lrl + subroutine base_set_lcl(idxmap,val) + implicit none + class(psb_indx_map), intent(inout) :: idxmap + integer(psb_lpk_), intent(in) :: val + !$omp critical idxmap%local_cols = val - end subroutine base_set_lc + !$omp end critical + end subroutine base_set_lcl + + subroutine base_inc_lc(idxmap) + implicit none + class(psb_indx_map), intent(inout) :: idxmap + !$omp atomic + idxmap%local_cols = idxmap%local_cols + 1 + !$omp end atomic + end subroutine base_inc_lc subroutine base_set_p_adjcncy(idxmap,val) use psb_realloc_mod diff --git a/base/modules/desc/psb_list_map_mod.F90 b/base/modules/desc/psb_list_map_mod.F90 index 6b61cf52..913145da 100644 --- a/base/modules/desc/psb_list_map_mod.F90 +++ b/base/modules/desc/psb_list_map_mod.F90 @@ -178,7 +178,10 @@ contains end if if (present(mask)) then - + + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then @@ -191,9 +194,12 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,owned_) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then idx(i) = idxmap%loc_to_glob(idx(i)) @@ -204,7 +210,8 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do + end if end subroutine list_ll2gv1 @@ -298,6 +305,9 @@ contains if (present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,owned_) & + !$omp private(i,ix) do i=1,is if (mask(i)) then if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -309,6 +319,7 @@ contains end if end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -317,6 +328,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idx,idxmap,owned_) & + !$omp private(i,ix) do i=1, is if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idx(i)) @@ -326,6 +340,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -365,6 +380,9 @@ contains if (present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idxin,idxout,idxmap,owned_) & + !$omp private(i,ix) do i=1,is if (mask(i)) then if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -376,6 +394,7 @@ contains end if end if end do + !$omp end parallel do else idxout(1:is) = -1 info = -1 @@ -384,6 +403,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(is,idxin,idxout,idxmap,owned_) & + !$omp private(i,ix) do i=1, is if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then ix = idxmap%glob_to_loc(idxin(i)) @@ -393,6 +415,7 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do else idxout(1:is) = -1 info = -1 @@ -541,6 +564,10 @@ contains else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,is,idx,idxmap,laddsz,lidx) & + !$omp private(i,ix,info) + ! $ o m p reduction(.AND.:isLoopValid) do i=1, is if (info /= 0) cycle if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -579,8 +606,8 @@ contains idx(i) = -1 end if end do + !$omp end parallel do end if - else if (.not.present(lidx)) then if (present(mask)) then diff --git a/base/modules/desc/psb_repl_map_mod.F90 b/base/modules/desc/psb_repl_map_mod.F90 index fe51b7b1..f68ae3b8 100644 --- a/base/modules/desc/psb_repl_map_mod.F90 +++ b/base/modules/desc/psb_repl_map_mod.F90 @@ -169,7 +169,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap) & + !$omp private(i) do i=1, size(idx) if (mask(i)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then @@ -179,9 +181,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap) & + !$omp private(i) do i=1, size(idx) if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then ! do nothing @@ -189,7 +193,7 @@ contains idx(i) = -1 end if end do - + !$omp end parallel do end if end subroutine repl_l2gv1 @@ -223,7 +227,9 @@ contains end if if (present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap,im) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then @@ -233,9 +239,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap,im) & + !$omp private(i) do i=1, im if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then idxout(i) = idxin(i) @@ -243,7 +251,7 @@ contains idxout(i) = -1 end if end do - + !$omp end parallel do end if if (is > im) info = -3 @@ -324,6 +332,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,is) & + !$omp private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -333,7 +344,11 @@ contains end if end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,idxmap,is) & + !$omp private(i) do i=1,is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -344,6 +359,7 @@ contains end if end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -352,6 +368,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,is) & + !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -359,7 +378,11 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,idxmap,is) & + !$omp private(i) do i=1,is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -367,6 +390,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do else idx(1:is) = -1 info = -1 @@ -409,6 +433,9 @@ contains if (present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap,im) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -418,7 +445,11 @@ contains end if end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,idxmap,im) & + !$omp private(i) do i=1,im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -428,6 +459,7 @@ contains end if end if end do + !$omp end parallel do else idxout(1:im) = -1 info = -1 @@ -436,6 +468,9 @@ contains else if (.not.present(mask)) then if (idxmap%is_asb()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap,im) & + !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -443,7 +478,11 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do else if (idxmap%is_valid()) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,idxmap,im) & + !$omp private(i) do i=1,im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -451,6 +490,7 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do else idxout(1:im) = -1 info = -1 @@ -557,6 +597,9 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,lidx,is,idxmap) & + !$omp private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -566,9 +609,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,lidx,is,idxmap) & + !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -576,9 +621,13 @@ contains idx(i) = -1 end if end do + !$omp end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idx,is,idxmap) & + !$omp private(i) do i=1, is if (mask(i)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then @@ -588,8 +637,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idx,is,idxmap) & + !$omp private(i) do i=1, is if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then ! do nothing @@ -597,6 +649,7 @@ contains idx(i) = -1 end if end do + !$omp end parallel do end if end if else @@ -644,6 +697,9 @@ contains else if (idxmap%is_valid()) then if (present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -653,9 +709,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then - + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -663,9 +721,13 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do end if else if (.not.present(lidx)) then if (present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(mask,idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if (mask(i)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then @@ -675,8 +737,11 @@ contains end if end if end do - + !$omp end parallel do else if (.not.present(mask)) then + !$omp parallel do default(none) schedule(dynamic) & + !$omp shared(idxin,idxout,im,idxmap) & + !$omp private(i) do i=1, im if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then idxout(i) = idxin(i) @@ -684,6 +749,7 @@ contains idxout(i) = -1 end if end do + !$omp end parallel do end if end if else diff --git a/base/modules/psb_const_mod.F90 b/base/modules/psb_const_mod.F90 index 56134474..70631932 100644 --- a/base/modules/psb_const_mod.F90 +++ b/base/modules/psb_const_mod.F90 @@ -136,9 +136,9 @@ module psb_const_mod ! ! Version ! - character(len=*), parameter :: psb_version_string_ = "3.8.0" + character(len=*), parameter :: psb_version_string_ = "3.9.0" integer(psb_ipk_), parameter :: psb_version_major_ = 3 - integer(psb_ipk_), parameter :: psb_version_minor_ = 8 + integer(psb_ipk_), parameter :: psb_version_minor_ = 9 integer(psb_ipk_), parameter :: psb_patchlevel_ = 0 ! diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index 98deebd8..7f7f937c 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface + interface psb_abgdxyz + subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_c_vect_type, psb_cspmat_type + type(psb_c_vect_type), intent (inout) :: x + type(psb_c_vect_type), intent (inout) :: y + type(psb_c_vect_type), intent (inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_cabgdxyz_vect + end interface psb_abgdxyz + interface psb_geamax function psb_camax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_spk_, psb_ipk_, & diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index e4988387..12090956 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface + interface psb_abgdxyz + subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_d_vect_type, psb_dspmat_type + type(psb_d_vect_type), intent (inout) :: x + type(psb_d_vect_type), intent (inout) :: y + type(psb_d_vect_type), intent (inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_dabgdxyz_vect + end interface psb_abgdxyz + interface psb_geamax function psb_damax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index 93fe74b9..7a7ce783 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface + interface psb_abgdxyz + subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_spk_, psb_ipk_, & + & psb_s_vect_type, psb_sspmat_type + type(psb_s_vect_type), intent (inout) :: x + type(psb_s_vect_type), intent (inout) :: y + type(psb_s_vect_type), intent (inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_sabgdxyz_vect + end interface psb_abgdxyz + interface psb_geamax function psb_samax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_spk_, psb_ipk_, & diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index 06be1b82..bcfe9caa 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,6 +143,20 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface + interface psb_abgdxyz + subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + import :: psb_desc_type, psb_dpk_, psb_ipk_, & + & psb_z_vect_type, psb_zspmat_type + type(psb_z_vect_type), intent (inout) :: x + type(psb_z_vect_type), intent (inout) :: y + type(psb_z_vect_type), intent (inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_zabgdxyz_vect + end interface psb_abgdxyz + interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) import :: psb_desc_type, psb_dpk_, psb_ipk_, & diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index df15e0c9..41bab5ab 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -155,6 +155,9 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => c_base_abgdxyz + procedure, pass(w) :: xyzw => c_base_xyzw + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1018,7 +1021,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine c_base_axpby_v(m,alpha, x, beta, y, info) @@ -1047,7 +1050,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1078,7 +1081,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine c_base_axpby_a(m,alpha, x, beta, y, info) @@ -1126,6 +1129,64 @@ contains end subroutine c_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_c_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! + subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(alpha/=czero)) call x%sync() + if (y%is_dev().and.(beta/=czero)) call y%sync() + if (z%is_dev().and.(delta/=czero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + + end subroutine c_base_abgdxyz + + subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_base_vect_type), intent(inout) :: z + class(psb_c_base_vect_type), intent(inout) :: w + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=czero)) call x%sync() + if (y%is_dev().and.(b/=czero)) call y%sync() + if (z%is_dev().and.(d/=czero)) call z%sync() + if (w%is_dev().and.(f/=czero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine c_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_c_csr_mat_mod.f90 b/base/modules/serial/psb_c_csr_mat_mod.f90 index 8b076cc2..d09eca2b 100644 --- a/base/modules/serial/psb_c_csr_mat_mod.f90 +++ b/base/modules/serial/psb_c_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_c_csr_mat_mod end subroutine psb_c_csr_scals end interface - !> \namespace psb_base_mod \class psb_lc_csr_sparse_mat + + type, extends(psb_c_csr_sparse_mat) :: psb_c_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => c_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_c_ecsr_csmm + procedure, pass(a) :: csmv => psb_c_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_c_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_c_ecsr_cmp_nerwp + procedure, pass(a) :: free => c_ecsr_free + procedure, pass(a) :: mold => psb_c_ecsr_mold + + end type psb_c_ecsr_sparse_mat + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_csmv + interface + subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_c_ecsr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ecsr_csmv + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo + interface + subroutine psb_c_ecsr_cmp_nerwp(a,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ecsr_cmp_nerwp + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo + interface + subroutine psb_c_cp_ecsr_from_coo(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ecsr_from_coo + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_cp_from_fmt + interface + subroutine psb_c_cp_ecsr_from_fmt(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ecsr_from_fmt + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_mv_from_coo + interface + subroutine psb_c_mv_ecsr_from_coo(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ecsr_from_coo + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !! \see psb_c_base_mat_mod::psb_c_base_mv_from_fmt + interface + subroutine psb_c_mv_ecsr_from_fmt(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ecsr_from_fmt + end interface + + !> \memberof psb_c_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_c_ecsr_mold(a,b,info) + import + class(psb_c_ecsr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_lc_csr_sparse_mat !! \extends psb_lc_base_mat_mod::psb_lc_base_sparse_mat !! !! psb_lc_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function c_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function c_ecsr_get_fmt + + subroutine c_ecsr_free(a) + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_c_csr_sparse_mat%free() + + return + end subroutine c_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90 index fd423de3..e7c84b00 100644 --- a/base/modules/serial/psb_c_mat_mod.F90 +++ b/base/modules/serial/psb_c_mat_mod.F90 @@ -79,12 +79,14 @@ module psb_c_mat_mod use psb_c_base_mat_mod - use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat + use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat,& + & psb_c_ecsr_sparse_mat use psb_c_csc_mat_mod, only : psb_c_csc_sparse_mat, psb_lc_csc_sparse_mat type :: psb_cspmat_type class(psb_c_base_sparse_mat), allocatable :: a + class(psb_c_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lc_coo_sparse_mat), allocatable :: rmta @@ -202,6 +204,8 @@ module psb_c_mat_mod procedure, pass(a) :: cscnv_ip => psb_c_cscnv_ip procedure, pass(a) :: cscnv_base => psb_c_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_c_split_nd + procedure, pass(a) :: merge_nd => psb_c_merge_nd procedure, pass(a) :: clone => psb_cspmat_clone procedure, pass(a) :: move_alloc => psb_cspmat_type_move ! @@ -840,6 +844,24 @@ module psb_c_mat_mod ! ! + interface + subroutine psb_c_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_split_nd + end interface + + interface + subroutine psb_c_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_merge_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -859,7 +881,6 @@ module psb_c_mat_mod end subroutine psb_c_cscnv end interface - interface subroutine psb_c_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat @@ -871,7 +892,6 @@ module psb_c_mat_mod end subroutine psb_c_cscnv_ip end interface - interface subroutine psb_c_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 1a336d11..865f9456 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -102,6 +102,9 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => c_vect_abgdxyz + procedure, pass(z) :: xyzw => c_vect_xyzw + procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_a => c_vect_mlt_a procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2 @@ -771,6 +774,38 @@ contains end subroutine c_vect_axpby_a2 + subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_vect_type), intent(inout) :: x + class(psb_c_vect_type), intent(inout) :: y + class(psb_c_vect_type), intent(inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + + end subroutine c_vect_abgdxyz + + subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_vect_type), intent(inout) :: x + class(psb_c_vect_type), intent(inout) :: y + class(psb_c_vect_type), intent(inout) :: z + class(psb_c_vect_type), intent(inout) :: w + complex(psb_spk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine c_vect_xyzw + + subroutine c_vect_mlt_v(x, y, info) use psi_serial_mod implicit none @@ -1134,7 +1169,7 @@ contains end if end function c_vect_nrm2_weight - + function c_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 87f5b0e4..1ad1ffa5 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -155,6 +155,9 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => d_base_abgdxyz + procedure, pass(w) :: xyzw => d_base_xyzw + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1025,7 +1028,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine d_base_axpby_v(m,alpha, x, beta, y, info) @@ -1054,7 +1057,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1085,7 +1088,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine d_base_axpby_a(m,alpha, x, beta, y, info) @@ -1133,6 +1136,64 @@ contains end subroutine d_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_d_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! + subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_base_vect_type), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(alpha/=dzero)) call x%sync() + if (y%is_dev().and.(beta/=dzero)) call y%sync() + if (z%is_dev().and.(delta/=dzero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + + end subroutine d_base_abgdxyz + + subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_base_vect_type), intent(inout) :: z + class(psb_d_base_vect_type), intent(inout) :: w + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=dzero)) call x%sync() + if (y%is_dev().and.(b/=dzero)) call y%sync() + if (z%is_dev().and.(d/=dzero)) call z%sync() + if (w%is_dev().and.(f/=dzero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine d_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_d_csr_mat_mod.f90 b/base/modules/serial/psb_d_csr_mat_mod.f90 index d0aa622b..12d71755 100644 --- a/base/modules/serial/psb_d_csr_mat_mod.f90 +++ b/base/modules/serial/psb_d_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_d_csr_mat_mod end subroutine psb_d_csr_scals end interface - !> \namespace psb_base_mod \class psb_ld_csr_sparse_mat + + type, extends(psb_d_csr_sparse_mat) :: psb_d_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => d_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_d_ecsr_csmm + procedure, pass(a) :: csmv => psb_d_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_d_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_d_ecsr_cmp_nerwp + procedure, pass(a) :: free => d_ecsr_free + procedure, pass(a) :: mold => psb_d_ecsr_mold + + end type psb_d_ecsr_sparse_mat + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_csmv + interface + subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_d_ecsr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ecsr_csmv + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo + interface + subroutine psb_d_ecsr_cmp_nerwp(a,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ecsr_cmp_nerwp + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo + interface + subroutine psb_d_cp_ecsr_from_coo(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ecsr_from_coo + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_cp_from_fmt + interface + subroutine psb_d_cp_ecsr_from_fmt(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ecsr_from_fmt + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_mv_from_coo + interface + subroutine psb_d_mv_ecsr_from_coo(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ecsr_from_coo + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !! \see psb_d_base_mat_mod::psb_d_base_mv_from_fmt + interface + subroutine psb_d_mv_ecsr_from_fmt(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ecsr_from_fmt + end interface + + !> \memberof psb_d_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_d_ecsr_mold(a,b,info) + import + class(psb_d_ecsr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_ld_csr_sparse_mat !! \extends psb_ld_base_mat_mod::psb_ld_base_sparse_mat !! !! psb_ld_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function d_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function d_ecsr_get_fmt + + subroutine d_ecsr_free(a) + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_d_csr_sparse_mat%free() + + return + end subroutine d_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90 index 8f967ce1..fe09d83a 100644 --- a/base/modules/serial/psb_d_mat_mod.F90 +++ b/base/modules/serial/psb_d_mat_mod.F90 @@ -79,12 +79,14 @@ module psb_d_mat_mod use psb_d_base_mat_mod - use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat + use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat,& + & psb_d_ecsr_sparse_mat use psb_d_csc_mat_mod, only : psb_d_csc_sparse_mat, psb_ld_csc_sparse_mat type :: psb_dspmat_type class(psb_d_base_sparse_mat), allocatable :: a + class(psb_d_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ld_coo_sparse_mat), allocatable :: rmta @@ -202,6 +204,8 @@ module psb_d_mat_mod procedure, pass(a) :: cscnv_ip => psb_d_cscnv_ip procedure, pass(a) :: cscnv_base => psb_d_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_d_split_nd + procedure, pass(a) :: merge_nd => psb_d_merge_nd procedure, pass(a) :: clone => psb_dspmat_clone procedure, pass(a) :: move_alloc => psb_dspmat_type_move ! @@ -840,6 +844,24 @@ module psb_d_mat_mod ! ! + interface + subroutine psb_d_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_split_nd + end interface + + interface + subroutine psb_d_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_merge_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -859,7 +881,6 @@ module psb_d_mat_mod end subroutine psb_d_cscnv end interface - interface subroutine psb_d_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat @@ -871,7 +892,6 @@ module psb_d_mat_mod end subroutine psb_d_cscnv_ip end interface - interface subroutine psb_d_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index 88fa3262..55dd8230 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -102,6 +102,9 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => d_vect_abgdxyz + procedure, pass(z) :: xyzw => d_vect_xyzw + procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_a => d_vect_mlt_a procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 @@ -778,6 +781,38 @@ contains end subroutine d_vect_axpby_a2 + subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_vect_type), intent(inout) :: x + class(psb_d_vect_type), intent(inout) :: y + class(psb_d_vect_type), intent(inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + + end subroutine d_vect_abgdxyz + + subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_vect_type), intent(inout) :: x + class(psb_d_vect_type), intent(inout) :: y + class(psb_d_vect_type), intent(inout) :: z + class(psb_d_vect_type), intent(inout) :: w + real(psb_dpk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine d_vect_xyzw + + subroutine d_vect_mlt_v(x, y, info) use psi_serial_mod implicit none @@ -1141,7 +1176,7 @@ contains end if end function d_vect_nrm2_weight - + function d_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index fccd846b..26b82c31 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -155,6 +155,9 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => s_base_abgdxyz + procedure, pass(w) :: xyzw => s_base_xyzw + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1025,7 +1028,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine s_base_axpby_v(m,alpha, x, beta, y, info) @@ -1054,7 +1057,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1085,7 +1088,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine s_base_axpby_a(m,alpha, x, beta, y, info) @@ -1133,6 +1136,64 @@ contains end subroutine s_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_s_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! + subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_base_vect_type), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(alpha/=szero)) call x%sync() + if (y%is_dev().and.(beta/=szero)) call y%sync() + if (z%is_dev().and.(delta/=szero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + + end subroutine s_base_abgdxyz + + subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_base_vect_type), intent(inout) :: z + class(psb_s_base_vect_type), intent(inout) :: w + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=szero)) call x%sync() + if (y%is_dev().and.(b/=szero)) call y%sync() + if (z%is_dev().and.(d/=szero)) call z%sync() + if (w%is_dev().and.(f/=szero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine s_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_s_csr_mat_mod.f90 b/base/modules/serial/psb_s_csr_mat_mod.f90 index 6b4c51c7..884ede38 100644 --- a/base/modules/serial/psb_s_csr_mat_mod.f90 +++ b/base/modules/serial/psb_s_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_s_csr_mat_mod end subroutine psb_s_csr_scals end interface - !> \namespace psb_base_mod \class psb_ls_csr_sparse_mat + + type, extends(psb_s_csr_sparse_mat) :: psb_s_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => s_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_s_ecsr_csmm + procedure, pass(a) :: csmv => psb_s_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_s_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_s_ecsr_cmp_nerwp + procedure, pass(a) :: free => s_ecsr_free + procedure, pass(a) :: mold => psb_s_ecsr_mold + + end type psb_s_ecsr_sparse_mat + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_csmv + interface + subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_s_ecsr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ecsr_csmv + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo + interface + subroutine psb_s_ecsr_cmp_nerwp(a,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ecsr_cmp_nerwp + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo + interface + subroutine psb_s_cp_ecsr_from_coo(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ecsr_from_coo + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_cp_from_fmt + interface + subroutine psb_s_cp_ecsr_from_fmt(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ecsr_from_fmt + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_mv_from_coo + interface + subroutine psb_s_mv_ecsr_from_coo(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ecsr_from_coo + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !! \see psb_s_base_mat_mod::psb_s_base_mv_from_fmt + interface + subroutine psb_s_mv_ecsr_from_fmt(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ecsr_from_fmt + end interface + + !> \memberof psb_s_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_s_ecsr_mold(a,b,info) + import + class(psb_s_ecsr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_ls_csr_sparse_mat !! \extends psb_ls_base_mat_mod::psb_ls_base_sparse_mat !! !! psb_ls_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function s_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function s_ecsr_get_fmt + + subroutine s_ecsr_free(a) + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_s_csr_sparse_mat%free() + + return + end subroutine s_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90 index 43f1c619..868583b2 100644 --- a/base/modules/serial/psb_s_mat_mod.F90 +++ b/base/modules/serial/psb_s_mat_mod.F90 @@ -79,12 +79,14 @@ module psb_s_mat_mod use psb_s_base_mat_mod - use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat + use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat,& + & psb_s_ecsr_sparse_mat use psb_s_csc_mat_mod, only : psb_s_csc_sparse_mat, psb_ls_csc_sparse_mat type :: psb_sspmat_type class(psb_s_base_sparse_mat), allocatable :: a + class(psb_s_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_ls_coo_sparse_mat), allocatable :: rmta @@ -202,6 +204,8 @@ module psb_s_mat_mod procedure, pass(a) :: cscnv_ip => psb_s_cscnv_ip procedure, pass(a) :: cscnv_base => psb_s_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_s_split_nd + procedure, pass(a) :: merge_nd => psb_s_merge_nd procedure, pass(a) :: clone => psb_sspmat_clone procedure, pass(a) :: move_alloc => psb_sspmat_type_move ! @@ -840,6 +844,24 @@ module psb_s_mat_mod ! ! + interface + subroutine psb_s_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_split_nd + end interface + + interface + subroutine psb_s_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_merge_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -859,7 +881,6 @@ module psb_s_mat_mod end subroutine psb_s_cscnv end interface - interface subroutine psb_s_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat @@ -871,7 +892,6 @@ module psb_s_mat_mod end subroutine psb_s_cscnv_ip end interface - interface subroutine psb_s_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index 7a54ecf0..a50b2a0a 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -102,6 +102,9 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => s_vect_abgdxyz + procedure, pass(z) :: xyzw => s_vect_xyzw + procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_a => s_vect_mlt_a procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2 @@ -778,6 +781,38 @@ contains end subroutine s_vect_axpby_a2 + subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_vect_type), intent(inout) :: x + class(psb_s_vect_type), intent(inout) :: y + class(psb_s_vect_type), intent(inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + + end subroutine s_vect_abgdxyz + + subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_vect_type), intent(inout) :: x + class(psb_s_vect_type), intent(inout) :: y + class(psb_s_vect_type), intent(inout) :: z + class(psb_s_vect_type), intent(inout) :: w + real(psb_spk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine s_vect_xyzw + + subroutine s_vect_mlt_v(x, y, info) use psi_serial_mod implicit none @@ -1141,7 +1176,7 @@ contains end if end function s_vect_nrm2_weight - + function s_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/serial/psb_serial_mod.f90 b/base/modules/serial/psb_serial_mod.f90 index 627b318e..a25c1c37 100644 --- a/base/modules/serial/psb_serial_mod.f90 +++ b/base/modules/serial/psb_serial_mod.f90 @@ -36,9 +36,7 @@ module psb_serial_mod use psb_string_mod use psb_sort_mod - use psi_serial_mod, & - & psb_gth => psi_gth,& - & psb_sct => psi_sct + use psi_serial_mod use psb_s_serial_mod use psb_d_serial_mod diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 2a14de21..a3afc9c1 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -155,6 +155,9 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => z_base_abgdxyz + procedure, pass(w) :: xyzw => z_base_xyzw + ! ! Vector by vector multiplication. Need all variants ! to handle multiple requirements from preconditioners @@ -1018,7 +1021,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine z_base_axpby_v(m,alpha, x, beta, y, info) @@ -1047,7 +1050,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x The class(base_vect) to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param y The class(base_vect) to be added !! \param z The class(base_vect) to be returned !! \param info return code @@ -1078,7 +1081,7 @@ contains !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param x(:) The array to be added - !! \param beta scalar alpha + !! \param beta scalar beta !! \param info return code !! subroutine z_base_axpby_a(m,alpha, x, beta, y, info) @@ -1126,6 +1129,64 @@ contains end subroutine z_base_axpby_a2 + ! + ! ABGDXYZ is invoked via Z, hence the structure below. + ! + ! + !> Function base_abgdxyz + !! \memberof psb_z_base_vect_type + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \param m Number of entries to be considered + !! \param alpha scalar alpha + !! \param beta scalar beta + !! \param gamma scalar gamma + !! \param delta scalar delta + !! \param x The class(base_vect) to be added + !! \param y The class(base_vect) to be added + !! \param z The class(base_vect) to be added + !! \param info return code + !! + subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_base_vect_type), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(alpha/=zzero)) call x%sync() + if (y%is_dev().and.(beta/=zzero)) call y%sync() + if (z%is_dev().and.(delta/=zzero)) call z%sync() + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call y%set_host() + call z%set_host() + + end subroutine z_base_abgdxyz + + subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_base_vect_type), intent(inout) :: z + class(psb_z_base_vect_type), intent(inout) :: w + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + if (x%is_dev().and.(a/=zzero)) call x%sync() + if (y%is_dev().and.(b/=zzero)) call y%sync() + if (z%is_dev().and.(d/=zzero)) call z%sync() + if (w%is_dev().and.(f/=zzero)) call w%sync() + call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info) + call y%set_host() + call z%set_host() + call w%set_host() + + end subroutine z_base_xyzw + ! ! Multiple variants of two operations: diff --git a/base/modules/serial/psb_z_csr_mat_mod.f90 b/base/modules/serial/psb_z_csr_mat_mod.f90 index 4ec8dd00..c328fead 100644 --- a/base/modules/serial/psb_z_csr_mat_mod.f90 +++ b/base/modules/serial/psb_z_csr_mat_mod.f90 @@ -579,7 +579,111 @@ module psb_z_csr_mat_mod end subroutine psb_z_csr_scals end interface - !> \namespace psb_base_mod \class psb_lz_csr_sparse_mat + + type, extends(psb_z_csr_sparse_mat) :: psb_z_ecsr_sparse_mat + + !> Number of non-empty rows + integer(psb_ipk_) :: nnerws + !> Indices of non-empty rows + integer(psb_ipk_), allocatable :: nerwp(:) + + contains + procedure, nopass :: get_fmt => z_ecsr_get_fmt + + ! procedure, pass(a) :: csmm => psb_z_ecsr_csmm + procedure, pass(a) :: csmv => psb_z_ecsr_csmv + + procedure, pass(a) :: cp_from_coo => psb_z_cp_ecsr_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cp_ecsr_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_mv_ecsr_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_mv_ecsr_from_fmt + + procedure, pass(a) :: cmp_nerwp => psb_z_ecsr_cmp_nerwp + procedure, pass(a) :: free => z_ecsr_free + procedure, pass(a) :: mold => psb_z_ecsr_mold + + end type psb_z_ecsr_sparse_mat + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_csmv + interface + subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) + import + class(psb_z_ecsr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ecsr_csmv + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo + interface + subroutine psb_z_ecsr_cmp_nerwp(a,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ecsr_cmp_nerwp + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo + interface + subroutine psb_z_cp_ecsr_from_coo(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ecsr_from_coo + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_cp_from_fmt + interface + subroutine psb_z_cp_ecsr_from_fmt(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ecsr_from_fmt + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_mv_from_coo + interface + subroutine psb_z_mv_ecsr_from_coo(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ecsr_from_coo + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !! \see psb_z_base_mat_mod::psb_z_base_mv_from_fmt + interface + subroutine psb_z_mv_ecsr_from_fmt(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ecsr_from_fmt + end interface + + !> \memberof psb_z_ecsr_sparse_mat + !| \see psb_base_mat_mod::psb_base_mold + interface + subroutine psb_z_ecsr_mold(a,b,info) + import + class(psb_z_ecsr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ecsr_mold + end interface + + + + !> \namespace psb_base_mod \class psb_lz_csr_sparse_mat !! \extends psb_lz_base_mat_mod::psb_lz_base_sparse_mat !! !! psb_lz_csr_sparse_mat type and the related methods. @@ -1178,6 +1282,26 @@ contains + function z_ecsr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ECSR' + end function z_ecsr_get_fmt + + subroutine z_ecsr_free(a) + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + + + if (allocated(a%nerwp)) deallocate(a%nerwp) + a%nnerws = 0 + call a%psb_z_csr_sparse_mat%free() + + return + end subroutine z_ecsr_free + + ! == =================================== ! ! diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90 index c534cad5..48b670de 100644 --- a/base/modules/serial/psb_z_mat_mod.F90 +++ b/base/modules/serial/psb_z_mat_mod.F90 @@ -79,12 +79,14 @@ module psb_z_mat_mod use psb_z_base_mat_mod - use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat + use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat,& + & psb_z_ecsr_sparse_mat use psb_z_csc_mat_mod, only : psb_z_csc_sparse_mat, psb_lz_csc_sparse_mat type :: psb_zspmat_type class(psb_z_base_sparse_mat), allocatable :: a + class(psb_z_base_sparse_mat), allocatable :: ad, and integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ type(psb_lz_coo_sparse_mat), allocatable :: rmta @@ -202,6 +204,8 @@ module psb_z_mat_mod procedure, pass(a) :: cscnv_ip => psb_z_cscnv_ip procedure, pass(a) :: cscnv_base => psb_z_cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base + procedure, pass(a) :: split_nd => psb_z_split_nd + procedure, pass(a) :: merge_nd => psb_z_merge_nd procedure, pass(a) :: clone => psb_zspmat_clone procedure, pass(a) :: move_alloc => psb_zspmat_type_move ! @@ -840,6 +844,24 @@ module psb_z_mat_mod ! ! + interface + subroutine psb_z_split_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_split_nd + end interface + + interface + subroutine psb_z_merge_nd(a,n_rows,n_cols,info) + import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_merge_nd + end interface + ! ! CSCNV: switches to a different internal derived type. ! 3 versions: copying to target @@ -859,7 +881,6 @@ module psb_z_mat_mod end subroutine psb_z_cscnv end interface - interface subroutine psb_z_cscnv_ip(a,iinfo,type,mold,dupl) import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat @@ -871,7 +892,6 @@ module psb_z_mat_mod end subroutine psb_z_cscnv_ip end interface - interface subroutine psb_z_cscnv_base(a,b,info,dupl) import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index e8a34859..21e0c546 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -102,6 +102,9 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 + procedure, pass(z) :: abgdxyz => z_vect_abgdxyz + procedure, pass(z) :: xyzw => z_vect_xyzw + procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_a => z_vect_mlt_a procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2 @@ -771,6 +774,38 @@ contains end subroutine z_vect_axpby_a2 + subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_vect_type), intent(inout) :: x + class(psb_z_vect_type), intent(inout) :: y + class(psb_z_vect_type), intent(inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + if (allocated(z%v)) & + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + + end subroutine z_vect_abgdxyz + + subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_vect_type), intent(inout) :: x + class(psb_z_vect_type), intent(inout) :: y + class(psb_z_vect_type), intent(inout) :: z + class(psb_z_vect_type), intent(inout) :: w + complex(psb_dpk_), intent (in) :: a, b, c, d, e, f + integer(psb_ipk_), intent(out) :: info + + if (allocated(w%v)) & + call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info) + + end subroutine z_vect_xyzw + + subroutine z_vect_mlt_v(x, y, info) use psi_serial_mod implicit none @@ -1134,7 +1169,7 @@ contains end if end function z_vect_nrm2_weight - + function z_vect_nrm2_weight_mask(n,x,w,id,info,aux) result(res) use psi_serial_mod implicit none diff --git a/base/modules/tools/psb_c_tools_mod.F90 b/base/modules/tools/psb_c_tools_mod.F90 index 2de8f906..0ed2d82c 100644 --- a/base/modules/tools/psb_c_tools_mod.F90 +++ b/base/modules/tools/psb_c_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_c_tools_mod end interface interface psb_spasb - subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_cspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_c_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_c_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_cspasb end interface diff --git a/base/modules/tools/psb_d_tools_mod.F90 b/base/modules/tools/psb_d_tools_mod.F90 index 30e45d53..26f83201 100644 --- a/base/modules/tools/psb_d_tools_mod.F90 +++ b/base/modules/tools/psb_d_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_d_tools_mod end interface interface psb_spasb - subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_dspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_d_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_d_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_dspasb end interface diff --git a/base/modules/tools/psb_s_tools_mod.F90 b/base/modules/tools/psb_s_tools_mod.F90 index 5d2f8d00..0f70a31a 100644 --- a/base/modules/tools/psb_s_tools_mod.F90 +++ b/base/modules/tools/psb_s_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_s_tools_mod end interface interface psb_spasb - subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_sspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_s_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_s_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_sspasb end interface diff --git a/base/modules/tools/psb_z_tools_mod.F90 b/base/modules/tools/psb_z_tools_mod.F90 index 9d6bd77b..1f24e05a 100644 --- a/base/modules/tools/psb_z_tools_mod.F90 +++ b/base/modules/tools/psb_z_tools_mod.F90 @@ -250,7 +250,7 @@ Module psb_z_tools_mod end interface interface psb_spasb - subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) + subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) import implicit none type(psb_zspmat_type), intent (inout) :: a @@ -259,6 +259,7 @@ Module psb_z_tools_mod integer(psb_ipk_),optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_z_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and end subroutine psb_zspasb end interface diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index da3dd93b..7c22bb06 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -741,3 +741,86 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info) return end subroutine psb_caddconst_vect + + +subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect + implicit none + type(psb_c_vect_type), intent (inout) :: x + type(psb_c_vect_type), intent (inout) :: y + type(psb_c_vect_type), intent (inout) :: z + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_c_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_cabgdxyz_vect + diff --git a/base/psblas/psb_cspmm.f90 b/base/psblas/psb_cspmm.f90 index fd8a9c39..22c6408f 100644 --- a/base/psblas/psb_cspmm.f90 +++ b/base/psblas/psb_cspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_cspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,14 +194,46 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + + if (allocated(a%ad)) then + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,cone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index c386f8f2..1de77647 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -741,3 +741,86 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info) return end subroutine psb_daddconst_vect + + +subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect + implicit none + type(psb_d_vect_type), intent (inout) :: x + type(psb_d_vect_type), intent (inout) :: y + type(psb_d_vect_type), intent (inout) :: z + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_d_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_dabgdxyz_vect + diff --git a/base/psblas/psb_dspmm.f90 b/base/psblas/psb_dspmm.f90 index a006c7e9..fa256276 100644 --- a/base/psblas/psb_dspmm.f90 +++ b/base/psblas/psb_dspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_dspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,14 +194,46 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + + if (allocated(a%ad)) then + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,done,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 78f4d01a..1b1f24e6 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -741,3 +741,86 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info) return end subroutine psb_saddconst_vect + + +subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect + implicit none + type(psb_s_vect_type), intent (inout) :: x + type(psb_s_vect_type), intent (inout) :: y + type(psb_s_vect_type), intent (inout) :: z + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_s_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_sabgdxyz_vect + diff --git a/base/psblas/psb_sspmm.f90 b/base/psblas/psb_sspmm.f90 index 43ee0d48..6c723831 100644 --- a/base/psblas/psb_sspmm.f90 +++ b/base/psblas/psb_sspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_sspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,14 +194,46 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + + if (allocated(a%ad)) then + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,sone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 2258f38f..0f37a1f4 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -741,3 +741,86 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info) return end subroutine psb_zaddconst_vect + + +subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + & desc_a, info) + use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect + implicit none + type(psb_z_vect_type), intent (inout) :: x + type(psb_z_vect_type), intent (inout) :: y + type(psb_z_vect_type), intent (inout) :: z + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + type(psb_desc_type), intent (in) :: desc_a + integer(psb_ipk_), intent(out) :: info + ! locals + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: np, me,& + & err_act, iix, jjx, iiy, jjy, nr + integer(psb_lpk_) :: ix, ijx, iy, ijy, m + character(len=20) :: name, ch_err + + name='psb_z_addconst_vect' + if (psb_errstatus_fatal()) return + info=psb_success_ + call psb_erractionsave(err_act) + + ctxt=desc_a%get_context() + + call psb_info(ctxt, me, np) + if (np == -ione) then + info = psb_err_context_error_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(x%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(y%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (.not.allocated(z%v)) then + info = psb_err_invalid_vect_state_ + call psb_errpush(info,name) + goto 9999 + endif + + ix = ione + iy = ione + + m = desc_a%get_global_rows() + nr = desc_a%get_local_rows() + + ! check vector correctness + call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 1' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_chkvect 2' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + if(desc_a%get_local_rows() > 0) then + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + +end subroutine psb_zabgdxyz_vect + diff --git a/base/psblas/psb_zspmm.f90 b/base/psblas/psb_zspmm.f90 index b58ca303..179e4fad 100644 --- a/base/psblas/psb_zspmm.f90 +++ b/base/psblas/psb_zspmm.f90 @@ -83,6 +83,9 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& character(len=20) :: name, ch_err logical :: aliw, doswap_ integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: do_timings=.true. + integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1 + integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1 name='psb_zspmv' info=psb_success_ @@ -130,6 +133,19 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& call psb_errpush(info,name) goto 9999 end if + if ((do_timings).and.(mv_phase1==-1)) & + & mv_phase1 = psb_get_timer_idx("SPMM: and send ") + if ((do_timings).and.(mv_phase2==-1)) & + & mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad") + if ((do_timings).and.(mv_phase3==-1)) & + & mv_phase3 = psb_get_timer_idx("SPMM: and rcv") + if ((do_timings).and.(mv_phase4==-1)) & + & mv_phase4 = psb_get_timer_idx("SPMM: and cmp and") + if ((do_timings).and.(mv_phase11==-1)) & + & mv_phase11 = psb_get_timer_idx("SPMM: noand exch ") + if ((do_timings).and.(mv_phase12==-1)) & + & mv_phase12 = psb_get_timer_idx("SPMM: noand cmp") + m = desc_a%get_global_rows() n = desc_a%get_global_cols() @@ -178,14 +194,46 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,& if (trans_ == 'N') then ! Matrix is not transposed - - if (doswap_) then - call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& - & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + + if (allocated(a%ad)) then + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + !if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt() + if (do_timings) call psb_barrier(ctxt) + if (do_timings) call psb_tic(mv_phase1) + if (doswap_) call psi_swapdata(psb_swap_send_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase1) + if (do_timings) call psb_tic(mv_phase2) + call a%ad%spmm(alpha,x%v,beta,y%v,info) + if (do_timings) call psb_tic(mv_phase3) + if (doswap_) call psi_swapdata(psb_swap_recv_,& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + if (do_timings) call psb_toc(mv_phase3) + if (do_timings) call psb_tic(mv_phase4) + call a%and%spmm(alpha,x%v,zone,y%v,info) + if (do_timings) call psb_toc(mv_phase4) + end block + + else + block + logical, parameter :: do_timings=.true. + real(psb_dpk_) :: t1, t2, t3, t4, t5 + if (do_timings) call psb_barrier(ctxt) + + if (do_timings) call psb_tic(mv_phase11) + if (doswap_) then + call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& + & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) + end if + if (do_timings) call psb_toc(mv_phase11) + if (do_timings) call psb_tic(mv_phase12) + call psb_csmm(alpha,a,x,beta,y,info) + if (do_timings) call psb_toc(mv_phase12) + end block end if - - call psb_csmm(alpha,a,x,beta,y,info) - + if(info /= psb_success_) then info = psb_err_from_subroutine_non_ call psb_errpush(info,name) diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90 index 17f2cdc8..fc170a6a 100644 --- a/base/serial/impl/psb_c_base_mat_impl.F90 +++ b/base/serial/impl/psb_c_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_c_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_c_base_vect_mv @@ -2060,8 +2060,8 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2082,6 +2082,7 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) @@ -2161,8 +2162,11 @@ subroutine psb_c_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90 index 5c90e287..ccf7e42d 100644 --- a/base/serial/impl/psb_c_coo_impl.F90 +++ b/base/serial/impl/psb_c_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_c_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo @@ -4174,7 +4175,6 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4205,7 +4205,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4214,7 +4214,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4268,7 +4268,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,11 +4301,14 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then - iaux(:) = 0 + !if (use_buffers) then #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,16 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnzr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnzr) + do i=1,nr + maxnzr = max(maxnzr,iaux(i)) + end do + !$OMP END PARALLEL DO #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4333,8 +4345,12 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnzr = 0 + do i=1,nr + maxnzr = max(maxnzr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4342,22 +4358,21 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4397,67 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnzr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4575,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then @@ -4710,7 +4732,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4732,38 +4754,41 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnzr+2)) + ! --------------------------------------------------- - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - last_elem = j - end if - end do + last_elem = j + end if + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_c_csc_impl.F90 b/base/serial/impl/psb_c_csc_impl.F90 index 54332d06..fe7227dd 100644 --- a/base/serial/impl/psb_c_csc_impl.F90 +++ b/base/serial/impl/psb_c_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_c_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_lc_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90 index 2028300d..276d3d1c 100644 --- a/base/serial/impl/psb_c_csr_impl.F90 +++ b/base/serial/impl/psb_c_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_c_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d @@ -4312,6 +4322,266 @@ contains end subroutine psb_ccsrspspmm #endif +subroutine psb_c_ecsr_mold(a,b,info) + use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold + use psb_error_mod + implicit none + class(psb_c_ecsr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ecsr_mold + +subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_csmv + implicit none + class(psb_c_ecsr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='c_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ecsr_cmp_nerwp + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws +end subroutine psb_c_ecsr_cmp_nerwp + +subroutine psb_c_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_coo + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_cp_ecsr_from_coo + +subroutine psb_c_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_coo + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_mv_ecsr_from_coo + +subroutine psb_c_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_c_base_mat_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_fmt + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_mv_ecsr_from_fmt + +subroutine psb_c_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_c_base_mat_mod + use psb_realloc_mod + use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_fmt + implicit none + + class(psb_c_ecsr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_c_cp_ecsr_from_fmt + ! ! ! lc version @@ -6021,7 +6291,7 @@ subroutine psb_lc_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_c_mat_impl.F90 b/base/serial/impl/psb_c_mat_impl.F90 index df5c4cd9..532cb9b8 100644 --- a/base/serial/impl/psb_c_mat_impl.F90 +++ b/base/serial/impl/psb_c_mat_impl.F90 @@ -1213,6 +1213,106 @@ subroutine psb_c_b_csclip(a,b,info,& end subroutine psb_c_b_csclip +subroutine psb_c_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_c_mat_mod, psb_protect_name => psb_c_split_nd + implicit none + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + type(psb_c_coo_sparse_mat) :: acoo + type(psb_c_csr_sparse_mat), allocatable :: aclip + type(psb_c_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_split_nd + +subroutine psb_c_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_c_mat_mod, psb_protect_name => psb_c_merge_nd + implicit none + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold + type(psb_c_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_merge_nd + subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1346,65 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_c_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_c_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_c_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1414,79 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_c_base_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_c_base_sparse_mat), intent(in), optional :: mold + + class(psb_c_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_c_cscnv subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1495,12 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) use psb_c_mat_mod, psb_protect_name => psb_c_cscnv_ip implicit none - class(psb_cspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_cspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_c_base_sparse_mat), intent(in), optional :: mold - class(psb_c_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1527,55 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_c_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_c_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_c_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1585,77 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_c_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_c_base_sparse_mat), intent(in), optional :: mold + class(psb_c_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_c_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_c_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_c_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_c_cscnv_ip diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90 index 69112529..01a942d1 100644 --- a/base/serial/impl/psb_d_base_mat_impl.F90 +++ b/base/serial/impl/psb_d_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_d_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_d_base_vect_mv @@ -2060,8 +2060,8 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2082,6 +2082,7 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) @@ -2161,8 +2162,11 @@ subroutine psb_d_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90 index f6a173d1..beb438aa 100644 --- a/base/serial/impl/psb_d_coo_impl.F90 +++ b/base/serial/impl/psb_d_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_d_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo @@ -4174,7 +4175,6 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4205,7 +4205,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4214,7 +4214,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4268,7 +4268,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,11 +4301,14 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then - iaux(:) = 0 + !if (use_buffers) then #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,16 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnzr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnzr) + do i=1,nr + maxnzr = max(maxnzr,iaux(i)) + end do + !$OMP END PARALLEL DO #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4333,8 +4345,12 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnzr = 0 + do i=1,nr + maxnzr = max(maxnzr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4342,22 +4358,21 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4397,67 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnzr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4575,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then @@ -4710,7 +4732,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4732,38 +4754,41 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnzr+2)) + ! --------------------------------------------------- - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - last_elem = j - end if - end do + last_elem = j + end if + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_d_csc_impl.F90 b/base/serial/impl/psb_d_csc_impl.F90 index 1761b051..d9fa2874 100644 --- a/base/serial/impl/psb_d_csc_impl.F90 +++ b/base/serial/impl/psb_d_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_d_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_ld_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90 index 10f99bc9..a0aaeeee 100644 --- a/base/serial/impl/psb_d_csr_impl.F90 +++ b/base/serial/impl/psb_d_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_d_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_d_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d @@ -4312,6 +4322,266 @@ contains end subroutine psb_dcsrspspmm #endif +subroutine psb_d_ecsr_mold(a,b,info) + use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_mold + use psb_error_mod + implicit none + class(psb_d_ecsr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ecsr_mold + +subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_csmv + implicit none + class(psb_d_ecsr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='d_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ecsr_cmp_nerwp + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws +end subroutine psb_d_ecsr_cmp_nerwp + +subroutine psb_d_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_coo + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_cp_ecsr_from_coo + +subroutine psb_d_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_coo + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_mv_ecsr_from_coo + +subroutine psb_d_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_d_base_mat_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_fmt + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_mv_ecsr_from_fmt + +subroutine psb_d_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_d_base_mat_mod + use psb_realloc_mod + use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_fmt + implicit none + + class(psb_d_ecsr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_d_cp_ecsr_from_fmt + ! ! ! ld version @@ -6021,7 +6291,7 @@ subroutine psb_ld_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90 index 2a6fb9a5..e48654ce 100644 --- a/base/serial/impl/psb_d_mat_impl.F90 +++ b/base/serial/impl/psb_d_mat_impl.F90 @@ -1213,6 +1213,106 @@ subroutine psb_d_b_csclip(a,b,info,& end subroutine psb_d_b_csclip +subroutine psb_d_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_d_mat_mod, psb_protect_name => psb_d_split_nd + implicit none + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat), allocatable :: aclip + type(psb_d_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_split_nd + +subroutine psb_d_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_d_mat_mod, psb_protect_name => psb_d_merge_nd + implicit none + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold + type(psb_d_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_merge_nd + subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1346,65 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_d_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_d_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_d_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1414,79 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_d_base_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_d_base_sparse_mat), intent(in), optional :: mold + + class(psb_d_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_d_cscnv subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1495,12 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) use psb_d_mat_mod, psb_protect_name => psb_d_cscnv_ip implicit none - class(psb_dspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_dspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_d_base_sparse_mat), intent(in), optional :: mold - class(psb_d_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1527,55 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_d_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_d_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_d_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1585,77 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_d_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_d_base_sparse_mat), intent(in), optional :: mold + class(psb_d_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_d_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_d_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_d_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_d_cscnv_ip diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90 index 4a99a684..205d0358 100644 --- a/base/serial/impl/psb_s_base_mat_impl.F90 +++ b/base/serial/impl/psb_s_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_s_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_s_base_vect_mv @@ -2060,8 +2060,8 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2082,6 +2082,7 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) @@ -2161,8 +2162,11 @@ subroutine psb_s_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90 index 4c12d8fc..d833cf96 100644 --- a/base/serial/impl/psb_s_coo_impl.F90 +++ b/base/serial/impl/psb_s_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_s_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo @@ -4174,7 +4175,6 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4205,7 +4205,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4214,7 +4214,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4268,7 +4268,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) real(psb_spk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,11 +4301,14 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then - iaux(:) = 0 + !if (use_buffers) then #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,16 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnzr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnzr) + do i=1,nr + maxnzr = max(maxnzr,iaux(i)) + end do + !$OMP END PARALLEL DO #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4333,8 +4345,12 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnzr = 0 + do i=1,nr + maxnzr = max(maxnzr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4342,22 +4358,21 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4397,67 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnzr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4575,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then @@ -4710,7 +4732,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4732,38 +4754,41 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnzr+2)) + ! --------------------------------------------------- - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - last_elem = j - end if - end do + last_elem = j + end if + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_s_csc_impl.F90 b/base/serial/impl/psb_s_csc_impl.F90 index a66b7dc0..c135c9ef 100644 --- a/base/serial/impl/psb_s_csc_impl.F90 +++ b/base/serial/impl/psb_s_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_s_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_ls_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90 index f3d5c669..9d5dc145 100644 --- a/base/serial/impl/psb_s_csr_impl.F90 +++ b/base/serial/impl/psb_s_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_s_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_s_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d @@ -4312,6 +4322,266 @@ contains end subroutine psb_scsrspspmm #endif +subroutine psb_s_ecsr_mold(a,b,info) + use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_mold + use psb_error_mod + implicit none + class(psb_s_ecsr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ecsr_mold + +subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_csmv + implicit none + class(psb_s_ecsr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='s_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ecsr_cmp_nerwp + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws +end subroutine psb_s_ecsr_cmp_nerwp + +subroutine psb_s_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_coo + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_cp_ecsr_from_coo + +subroutine psb_s_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_coo + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_mv_ecsr_from_coo + +subroutine psb_s_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_s_base_mat_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_fmt + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_mv_ecsr_from_fmt + +subroutine psb_s_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_s_base_mat_mod + use psb_realloc_mod + use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_fmt + implicit none + + class(psb_s_ecsr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_s_cp_ecsr_from_fmt + ! ! ! ls version @@ -6021,7 +6291,7 @@ subroutine psb_ls_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_s_mat_impl.F90 b/base/serial/impl/psb_s_mat_impl.F90 index ce7ce653..77ac81e6 100644 --- a/base/serial/impl/psb_s_mat_impl.F90 +++ b/base/serial/impl/psb_s_mat_impl.F90 @@ -1213,6 +1213,106 @@ subroutine psb_s_b_csclip(a,b,info,& end subroutine psb_s_b_csclip +subroutine psb_s_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_s_mat_mod, psb_protect_name => psb_s_split_nd + implicit none + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat), allocatable :: aclip + type(psb_s_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_split_nd + +subroutine psb_s_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_s_mat_mod, psb_protect_name => psb_s_merge_nd + implicit none + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold + type(psb_s_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_merge_nd + subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1346,65 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_s_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_s_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_s_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1414,79 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_s_base_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_s_base_sparse_mat), intent(in), optional :: mold + + class(psb_s_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_s_cscnv subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1495,12 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) use psb_s_mat_mod, psb_protect_name => psb_s_cscnv_ip implicit none - class(psb_sspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_sspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_s_base_sparse_mat), intent(in), optional :: mold - class(psb_s_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1527,55 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_s_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_s_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_s_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1585,77 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_s_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_s_base_sparse_mat), intent(in), optional :: mold + class(psb_s_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_s_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_s_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_s_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_s_cscnv_ip diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90 index 404027c5..915993f0 100644 --- a/base/serial/impl/psb_z_base_mat_impl.F90 +++ b/base/serial/impl/psb_z_base_mat_impl.F90 @@ -2006,8 +2006,8 @@ subroutine psb_z_base_vect_mv(alpha,a,x,beta,y,info,trans) ! For the time being we just throw everything back ! onto the normal routines. - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%spmm(alpha,x%v,beta,y%v,info,trans) call y%set_host() end subroutine psb_z_base_vect_mv @@ -2060,8 +2060,8 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) goto 9999 end if - call x%sync() - call y%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if (present(d)) then call d%sync() if (present(scale)) then @@ -2082,6 +2082,7 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d) if (info == psb_success_)& & call a%inner_spsm(alpha,tmpv,beta,y,info,trans) + call y%set_host() if (info == psb_success_) then call tmpv%free(info) if (info == psb_success_) deallocate(tmpv,stat=info) @@ -2161,8 +2162,11 @@ subroutine psb_z_base_inner_vect_sv(alpha,a,x,beta,y,info,trans) info = psb_success_ call psb_erractionsave(err_act) + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) + call y%set_host() if (info /= psb_success_) then info = psb_err_from_subroutine_ diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90 index 44ee89b5..ea6b2492 100644 --- a/base/serial/impl/psb_z_coo_impl.F90 +++ b/base/serial/impl/psb_z_coo_impl.F90 @@ -165,7 +165,8 @@ subroutine psb_z_coo_scals(d,a,info) if (a%is_unit()) then call a%make_nonunit() end if - + + !$omp parallel do private(i) do i=1,a%get_nzeros() a%val(i) = a%val(i) * d enddo @@ -4174,7 +4175,6 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) #endif info = psb_success_ @@ -4205,7 +4205,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) ! 'iaux' has to allow the threads to have an exclusive group ! of indices as work space. Since each thread handles one ! row/column at the time, we allocate this way. - allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) + allocate(iaux(MAX((nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4214,7 +4214,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir) #else - allocate(iaux(nzin+2),stat=info) + allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) @@ -4268,7 +4268,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !locals integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) complex(psb_dpk_), allocatable :: vs(:) - integer(psb_ipk_) :: nza, nzl,iret + integer(psb_ipk_) :: nza, nzl,iret, maxnzr integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name = 'psb_fixcoo' @@ -4277,7 +4277,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf #if defined(OPENMP) integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads - integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) + integer(psb_ipk_), allocatable :: kaux(:),idxaux(:) #endif info = psb_success_ @@ -4301,11 +4301,14 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf use_buffers = .false. end if - if (use_buffers) then - iaux(:) = 0 + !if (use_buffers) then #if defined(OPENMP) + !$omp workshare + iaux(:) = 0 + !$omp end workshare + maxnzr = 0 !$OMP PARALLEL DO default(none) schedule(STATIC) & - !$OMP shared(nzin,ia,nr,iaux) & + !$OMP shared(nzin,ia,nr,iaux,maxnzr) & !$OMP private(i) & !$OMP reduction(.and.:use_buffers) do i=1,nzin @@ -4319,7 +4322,16 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf end if end do !$OMP END PARALLEL DO + maxnzr = 0 + !$OMP PARALLEL DO default(none) schedule(STATIC) & + !$OMP private(i) shared(nr,iaux)& + !$OMP reduction(max:maxnzr) + do i=1,nr + maxnzr = max(maxnzr,iaux(i)) + end do + !$OMP END PARALLEL DO #else + iaux(:) = 0 !srt_inp = .true. do i=1,nzin if ((ia(i) < 1).or.(ia(i) > nr)) then @@ -4333,8 +4345,12 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) end do + maxnzr = 0 + do i=1,nr + maxnzr = max(maxnzr,iaux(i)) + end do #endif - end if + !end if ! Check again use_buffers. We enter here if nzin >= nr and ! all the indices are valid @@ -4342,22 +4358,21 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf if (use_buffers) then #if defined(OPENMP) maxthreads = omp_get_max_threads() - allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) + allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info) if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info,name) goto 9999 end if + !$omp workshare kaux(:) = 0 - sum(:) = 0 - sum(1) = 1 + !$omp end workshare err = 0 - ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! index for each row. We do the same on 'kaux' !$OMP PARALLEL default(none) & - !$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & + !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) @@ -4382,60 +4397,67 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf !write(0,*) 'fix_coo_inner: trying with exscan' call psi_exscan(nr+1,iaux,info,shift=ione) !$OMP BARRIER - !$OMP SINGLE - !t0 = omp_get_wtime() - !$OMP END SINGLE - + + ! ------------------ Sorting and buffers ------------------- ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! unmodified 'iaux' - do j=idxstart,idxend + + !$omp do private(j) + do j=1,nr+1 idxaux(j) = iaux(j) end do + !$omp end do + ! Here we sort data inside the auxiliary buffers + !$omp do private(act_row,i,i1) do i=1,nzin act_row = ia(i) - if ((act_row >= idxstart) .and. (act_row <= idxend)) then - ias(idxaux(act_row)) = ia(i) - jas(idxaux(act_row)) = ja(i) - vs(idxaux(act_row)) = val(i) - idxaux(act_row) = idxaux(act_row) + 1 - end if + !$omp atomic capture + i1 =idxaux(act_row) + idxaux(act_row) = idxaux(act_row) + 1 + !$omp end atomic + ias(i1) = ia(i) + jas(i1) = ja(i) + vs(i1) = val(i) end do + !$omp end do - !$OMP BARRIER - !$OMP SINGLE - !t1 = omp_get_wtime() - !write(0,*) ithread,'Srt&Cpy :',t1-t0 - !$OMP END SINGLE ! Let's sort column indices and values. After that we will store ! the number of unique values in 'kaux' - do j=idxstart,idxend - first_elem = iaux(j) - last_elem = iaux(j+1) - 1 - nzl = last_elem - first_elem + 1 - - ! The row has elements? - if (nzl > 0) then - call psi_msort_up(nzl,jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) then - call psb_ip_reord(nzl,vs(first_elem:last_elem),& - & ias(first_elem:last_elem),jas(first_elem:last_elem), & - & idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnzr+2)) + !$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256) + do j=1,nr + first_elem = iaux(j) + last_elem = iaux(j+1) - 1 + nzl = last_elem - first_elem + 1 - ! Over each row we count the unique values - kaux(j) = 1 - do i=first_elem+1,last_elem - if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then - cycle + ! The row has elements? + if (nzl > 0) then + call psi_msort_up(nzl,jas(first_elem:last_elem), & + & ixt,iret) + if (iret == 0) then + call psb_ip_reord(nzl,vs(first_elem:last_elem),& + & ias(first_elem:last_elem),jas(first_elem:last_elem), & + & ixt) end if - kaux(j) = kaux(j) + 1 - end do - end if - end do + + ! Over each row we count the unique values + kaux(j) = 1 + do i=first_elem+1,last_elem + if (ias(i) == ias(i-1) .and. jas(i) == jas(i-1)) then + cycle + end if + kaux(j) = kaux(j) + 1 + end do + end if + end do + !$omp end do + deallocate(ixt) + end block ! -------------------------------------------------- ! ---------------- kaux composition ---------------- @@ -4553,7 +4575,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf nzout = kaux(nr+1) - 1 - deallocate(sum,kaux,idxaux,stat=info) + deallocate(kaux,idxaux,stat=info) #else !if (.not.srt_inp) then @@ -4710,7 +4732,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf & call psb_ip_reord(nzin,val,ia,ja,iaux) #if defined(OPENMP) !$OMP PARALLEL default(none) & - !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & + !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP work,first_elem,last_elem) @@ -4732,38 +4754,41 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf idxend = idxstart + work - 1 - ! --------------------------------------------------- + block + integer(psb_ipk_), allocatable :: ixt(:) + allocate(ixt(2*maxnzr+2)) + ! --------------------------------------------------- - first_elem = 0 - last_elem = -1 - act_row = idxstart - do j=1,nzin - if (ia(j) < act_row) then - cycle - else if ((ia(j) > idxend) .or. (work < 1)) then - exit - else if (ia(j) > act_row) then - nzl = last_elem - first_elem + 1 + first_elem = 0 + last_elem = -1 + act_row = idxstart + do j=1,nzin + if (ia(j) < act_row) then + cycle + else if ((ia(j) > idxend) .or. (work < 1)) then + exit + else if (ia(j) > act_row) then + nzl = last_elem - first_elem + 1 - if (nzl > 0) then - call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) - if (iret == 0) & - & call psb_ip_reord(nzl,val(first_elem:last_elem),& - & ia(first_elem:last_elem),ja(first_elem:last_elem),& - & iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) - end if + if (nzl > 0) then + call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret) + if (iret == 0) & + & call psb_ip_reord(nzl,val(first_elem:last_elem),& + & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt) + end if - act_row = act_row + 1 - first_elem = 0 - last_elem = -1 - else - if (first_elem == 0) then - first_elem = j - end if + act_row = act_row + 1 + first_elem = 0 + last_elem = -1 + else + if (first_elem == 0) then + first_elem = j + end if - last_elem = j - end if - end do + last_elem = j + end if + end do + end block !$OMP END PARALLEL #else i = 1 diff --git a/base/serial/impl/psb_z_csc_impl.F90 b/base/serial/impl/psb_z_csc_impl.F90 index e5516bd9..becb7003 100644 --- a/base/serial/impl/psb_z_csc_impl.F90 +++ b/base/serial/impl/psb_z_csc_impl.F90 @@ -2163,7 +2163,7 @@ subroutine psb_z_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -2328,7 +2328,7 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat nc = a%get_ncols() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) @@ -2461,7 +2461,7 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat nc = b%get_ncols() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) @@ -4058,7 +4058,7 @@ subroutine psb_lz_mv_csc_to_coo(a,b,info) nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90 index 5cf1c72d..7f11c3bd 100644 --- a/base/serial/impl/psb_z_csr_impl.F90 +++ b/base/serial/impl/psb_z_csr_impl.F90 @@ -3318,7 +3318,7 @@ subroutine psb_z_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat call b%set_nzeros(a%get_nzeros()) @@ -3489,7 +3489,7 @@ subroutine psb_z_cp_csr_to_fmt(a,b,info) if (a%is_dev()) call a%sync() b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat nr = a%get_nrows() - nz = a%get_nzeros() + nz = max(a%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) @@ -3594,7 +3594,7 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info) if (b%is_dev()) call b%sync() a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat nr = b%get_nrows() - nz = b%get_nzeros() + nz = max(b%get_nzeros(),ione) if (.false.) then if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) @@ -3805,6 +3805,7 @@ contains integer(psb_ipk_) :: ma, nb integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx + integer(psb_ipk_) :: nth, lth,ith ma = a%get_nrows() nb = b%get_ncols() @@ -3815,12 +3816,19 @@ contains ! dense accumulator ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf call psb_realloc(nb, acc, info) + !$omp parallel shared(nth,lth) + !$omp single + nth = omp_get_num_threads() + lth = min(nth, ma) + !$omp end single + !$omp end parallel allocate(offsets(omp_get_max_threads())) !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & - !$omp shared(a,b,c,offsets) + !$omp num_threads(lth) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma if (start_idx == 0) then @@ -3876,15 +3884,14 @@ contains !$omp end single !$omp barrier - - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -3892,9 +3899,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson @@ -3930,6 +3938,7 @@ contains !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) thread_upperbound = 0 start_idx = 0 + end_idx = 0 !$omp do schedule(static) private(irw, jj, j) do irw = 1, ma do jj = a%irp(irw), a%irp(irw + 1) - 1 @@ -3996,14 +4005,14 @@ contains !$omp barrier - if (omp_get_thread_num() /= 0) then - c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + if (omp_get_thread_num() /= 0) then + c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 + end if + do irw = start_idx, end_idx - 1 + c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) + end do end if - - do irw = start_idx, end_idx - 1 - c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) - end do - !$omp barrier !$omp single @@ -4011,9 +4020,10 @@ contains call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%ja, info) !$omp end single - - c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) - c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + if ((start_idx /= 0).and.(start_idx <= end_idx) ) then + c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) + c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) + end if !$omp end parallel end subroutine spmm_omp_gustavson_1d @@ -4312,6 +4322,266 @@ contains end subroutine psb_zcsrspspmm #endif +subroutine psb_z_ecsr_mold(a,b,info) + use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_mold + use psb_error_mod + implicit none + class(psb_z_ecsr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='ecsr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_ecsr_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ecsr_mold + +subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans) + use psb_error_mod + use psb_string_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_csmv + implicit none + class(psb_z_ecsr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: m, n + logical :: tra, ctra + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: ierr(5) + character(len=20) :: name='z_csr_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ecsr_cmp_nerwp + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: nnerws, i, nr, nzr + info = psb_success_ + nr = a%get_nrows() + call psb_realloc(nr,a%nerwp,info) + nnerws = 0 + do i=1, nr + nzr = a%irp(i+1)-a%irp(i) + if (nzr>0) then + nnerws = nnerws + 1 + a%nerwp(nnerws) = i + end if + end do + call psb_realloc(nnerws,a%nerwp,info) + a%nnerws = nnerws +end subroutine psb_z_ecsr_cmp_nerwp + +subroutine psb_z_cp_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_coo + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_cp_ecsr_from_coo + +subroutine psb_z_mv_ecsr_from_coo(a,b,info) + use psb_const_mod + use psb_realloc_mod + use psb_error_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_coo + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_mv_ecsr_from_coo + +subroutine psb_z_mv_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_z_base_mat_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_fmt + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_mv_ecsr_from_fmt + +subroutine psb_z_cp_ecsr_from_fmt(a,b,info) + use psb_const_mod + use psb_z_base_mat_mod + use psb_realloc_mod + use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_fmt + implicit none + + class(psb_z_ecsr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info == psb_success_) call a%cmp_nerwp(info) + +end subroutine psb_z_cp_ecsr_from_fmt + ! ! ! lz version @@ -6021,7 +6291,7 @@ subroutine psb_lz_mv_csr_to_coo(a,b,info) if (a%is_dev()) call a%sync() nr = a%get_nrows() nc = a%get_ncols() - nza = a%get_nzeros() + nza = max(a%get_nzeros(),ione) b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat call b%set_nzeros(a%get_nzeros()) diff --git a/base/serial/impl/psb_z_mat_impl.F90 b/base/serial/impl/psb_z_mat_impl.F90 index 2cebf9e7..18d6c03d 100644 --- a/base/serial/impl/psb_z_mat_impl.F90 +++ b/base/serial/impl/psb_z_mat_impl.F90 @@ -1213,6 +1213,106 @@ subroutine psb_z_b_csclip(a,b,info,& end subroutine psb_z_b_csclip +subroutine psb_z_split_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_z_mat_mod, psb_protect_name => psb_z_split_nd + implicit none + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + type(psb_z_coo_sparse_mat) :: acoo + type(psb_z_csr_sparse_mat), allocatable :: aclip + type(psb_z_ecsr_sparse_mat), allocatable :: andclip + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + allocate(aclip) + call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.) + allocate(a%ad,mold=a%a) + call a%ad%mv_from_coo(acoo,info) + call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.) + if (use_ecsr) then + allocate(andclip) + call andclip%mv_from_coo(acoo,info) + call move_alloc(andclip,a%and) + else + allocate(a%and,mold=a%a) + call a%and%mv_from_coo(acoo,info) + end if + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_split_nd + +subroutine psb_z_merge_nd(a,n_rows,n_cols,info) + use psb_error_mod + use psb_string_mod + use psb_z_mat_mod, psb_protect_name => psb_z_merge_nd + implicit none + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n_rows, n_cols + integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_),optional, intent(in) :: dupl +!!$ character(len=*), optional, intent(in) :: type +!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold + type(psb_z_coo_sparse_mat) :: acoo1,acoo2 + integer(psb_ipk_) :: nz + logical, parameter :: use_ecsr=.true. + character(len=20) :: name, ch_err + integer(psb_ipk_) :: err_act + + info = psb_success_ + name = 'psb_split' + call psb_erractionsave(err_act) + + call a%ad%mv_to_coo(acoo1,info) + call acoo1%set_bld() + call acoo1%set_nrows(n_rows) + call acoo1%set_ncols(n_cols) + call a%and%mv_to_coo(acoo2,info) + nz=acoo2%get_nzeros() + call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info) + if (allocated(a%a)) then + call a%a%free() + deallocate(a%a) + end if + allocate(a%a,mold=a%ad) + call a%a%mv_from_coo(acoo1,info) + + if (psb_errstatus_fatal()) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='cscnv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_merge_nd + subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) use psb_error_mod use psb_string_mod @@ -1246,54 +1346,65 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) goto 9999 end if - if (present(mold)) then - - allocate(altmp, mold=mold,stat=info) - - else if (present(type)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_z_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_z_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_z_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) goto 9999 - end select - else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if + end if - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - end if + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if - if (present(dupl)) then - call altmp%set_dupl(dupl) - else if (a%is_bld()) then - ! Does this make sense at all?? Who knows.. - call altmp%set_dupl(psb_dupl_def_) - end if + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - if (debug) write(psb_err_unit,*) 'Converting from ',& - & a%get_fmt(),' to ',altmp%get_fmt() + call altmp%cp_from_fmt(a%a, info) - call altmp%cp_from_fmt(a%a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if - if (info /= psb_success_) then - info = psb_err_from_subroutine_ - call psb_errpush(info,name,a_err="mv_from") - goto 9999 + call move_alloc(altmp,b%a) + else + call inner_cp_fmt(a%a,b%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_cp_fmt(a%and,b%and,info,type,mold,dupl) + end if end if - call move_alloc(altmp,b%a) call b%trim() call b%set_asb() call psb_erractionrestore(err_act) @@ -1303,7 +1414,79 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_cp_fmt(a,b,info,type,mold,dupl) + class(psb_z_base_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_z_base_sparse_mat), intent(in), optional :: mold + + class(psb_z_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%cp_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,b) + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_cp_fmt end subroutine psb_z_cscnv subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) @@ -1312,13 +1495,12 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) use psb_z_mat_mod, psb_protect_name => psb_z_cscnv_ip implicit none - class(psb_zspmat_type), intent(inout) :: a - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_),optional, intent(in) :: dupl - character(len=*), optional, intent(in) :: type + class(psb_zspmat_type), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type class(psb_z_base_sparse_mat), intent(in), optional :: mold - class(psb_z_base_sparse_mat), allocatable :: altmp integer(psb_ipk_) :: err_act character(len=20) :: name='cscnv_ip' @@ -1345,46 +1527,55 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) goto 9999 end if - if (present(mold)) then + if (.false.) then + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if - allocate(altmp, mold=mold,stat=info) + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if - else if (present(type)) then + if (debug) write(psb_err_unit,*) 'Converting in-place from ',& + & a%get_fmt(),' to ',altmp%get_fmt() - select case (psb_toupper(type)) - case ('CSR') - allocate(psb_z_csr_sparse_mat :: altmp, stat=info) - case ('COO') - allocate(psb_z_coo_sparse_mat :: altmp, stat=info) - case ('CSC') - allocate(psb_z_csc_sparse_mat :: altmp, stat=info) - case default - info = psb_err_format_unknown_ - call psb_errpush(info,name,a_err=type) - goto 9999 - end select + call altmp%mv_from_fmt(a%a, info) + call move_alloc(altmp,a%a) else - allocate(altmp, mold=psb_get_mat_default(a),stat=info) - end if - - if (info /= psb_success_) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 + call inner_mv_fmt(a%a,info,type,mold,dupl) + if (allocated(a%ad)) then + call inner_mv_fmt(a%ad,info,type,mold,dupl) + end if + if (allocated(a%and)) then + call inner_mv_fmt(a%and,info,type,mold,dupl) + end if end if - - if (debug) write(psb_err_unit,*) 'Converting in-place from ',& - & a%get_fmt(),' to ',altmp%get_fmt() - - call altmp%mv_from_fmt(a%a, info) - if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info,name,a_err="mv_from") goto 9999 end if - call move_alloc(altmp,a%a) call a%trim() call a%set_asb() call psb_erractionrestore(err_act) @@ -1394,6 +1585,77 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) 9999 call psb_error_handler(err_act) return +contains + subroutine inner_mv_fmt(a,info,type,mold,dupl) + class(psb_z_base_sparse_mat), intent(inout), allocatable :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_),optional, intent(in) :: dupl + character(len=*), optional, intent(in) :: type + class(psb_z_base_sparse_mat), intent(in), optional :: mold + class(psb_z_base_sparse_mat), allocatable :: altmp + integer(psb_ipk_) :: err_act + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(mold)) then + + allocate(altmp, mold=mold,stat=info) + + else if (present(type)) then + + select case (psb_toupper(type)) + case ('CSR') + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + case ('COO') + allocate(psb_z_coo_sparse_mat :: altmp, stat=info) + case ('CSC') + allocate(psb_z_csc_sparse_mat :: altmp, stat=info) + case default + info = psb_err_format_unknown_ + call psb_errpush(info,name,a_err=type) + goto 9999 + end select + else + allocate(psb_z_csr_sparse_mat :: altmp, stat=info) + !allocate(altmp, mold=psb_get_mat_default(a),stat=info) + end if + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + + if (present(dupl)) then + call altmp%set_dupl(dupl) + else if (a%is_bld()) then + ! Does this make sense at all?? Who knows.. + call altmp%set_dupl(psb_dupl_def_) + end if + + if (debug) write(psb_err_unit,*) 'Converting from ',& + & a%get_fmt(),' to ',altmp%get_fmt() + + call altmp%mv_from_fmt(a, info) + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="mv_from") + goto 9999 + end if + + call move_alloc(altmp,a) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + end subroutine inner_mv_fmt end subroutine psb_z_cscnv_ip diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index a3898349..e230a1e0 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1567,3 +1567,300 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine caxpbyv2 + +subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='cabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == czero) then + if (gamma == czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = czero + end do + else if (delta /= czero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = delta*z(i) + end do + end if + else if (alpha /= czero) then + if (delta == czero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = czero + end do + else if (delta /= czero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= czero) then + + if (alpha == czero) then + + if (delta == czero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = czero ! gamma*y(i) + end do + + else if (delta /= czero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = czero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= czero) then + + if (delta == czero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= czero) then + + if (gamma == czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = czero + end do + + else if (delta /= czero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= czero) then + if (delta == czero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = czero + end do + + else if (delta /= czero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= czero) then + if (alpha == czero) then + if (delta == czero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= czero) then + if (delta == czero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= czero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_cabgdxyz + +subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent (in) :: x(:) + complex(psb_spk_), intent (inout) :: y(:) + complex(psb_spk_), intent (inout) :: z(:) + complex(psb_spk_), intent (inout) :: w(:) + complex(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='cabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==czero).or.(b==czero).or. & + & (c==czero).or.(d==czero).or.& + & (e==czero).or.(f==czero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_cxyzw diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index 1b5b1442..bf1b2917 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1567,3 +1567,300 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine daxpbyv2 + +subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='dabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == dzero) then + if (gamma == dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = dzero + end do + else if (delta /= dzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = dzero + end do + else if (delta /= dzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= dzero) then + + if (alpha == dzero) then + + if (delta == dzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = dzero ! gamma*y(i) + end do + + else if (delta /= dzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = dzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + + if (delta == dzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= dzero) then + + if (gamma == dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = dzero + end do + + else if (delta /= dzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = dzero + end do + + else if (delta /= dzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= dzero) then + if (alpha == dzero) then + if (delta == dzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= dzero) then + if (delta == dzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= dzero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_dabgdxyz + +subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent (in) :: x(:) + real(psb_dpk_), intent (inout) :: y(:) + real(psb_dpk_), intent (inout) :: z(:) + real(psb_dpk_), intent (inout) :: w(:) + real(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='dabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==dzero).or.(b==dzero).or. & + & (c==dzero).or.(d==dzero).or.& + & (e==dzero).or.(f==dzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_dxyzw diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 9cdcdf0e..911ab4ec 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1567,3 +1567,300 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine eaxpbyv2 + +subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='eabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == ezero) then + if (gamma == ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = ezero + end do + else if (delta /= ezero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = delta*z(i) + end do + end if + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = ezero + end do + else if (delta /= ezero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= ezero) then + + if (alpha == ezero) then + + if (delta == ezero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = ezero ! gamma*y(i) + end do + + else if (delta /= ezero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = ezero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + + if (delta == ezero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= ezero) then + + if (gamma == ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = ezero + end do + + else if (delta /= ezero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = ezero + end do + + else if (delta /= ezero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= ezero) then + if (alpha == ezero) then + if (delta == ezero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= ezero) then + if (delta == ezero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= ezero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_eabgdxyz + +subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_epk_), intent (in) :: x(:) + integer(psb_epk_), intent (inout) :: y(:) + integer(psb_epk_), intent (inout) :: z(:) + integer(psb_epk_), intent (inout) :: w(:) + integer(psb_epk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='eabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==ezero).or.(b==ezero).or. & + & (c==ezero).or.(d==ezero).or.& + & (e==ezero).or.(f==ezero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_exyzw diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index d25617a9..fb42dfcd 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1567,3 +1567,300 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine i2axpbyv2 + +subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='i2abgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == i2zero) then + if (gamma == i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = i2zero + end do + else if (delta /= i2zero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = delta*z(i) + end do + end if + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = i2zero + end do + else if (delta /= i2zero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= i2zero) then + + if (alpha == i2zero) then + + if (delta == i2zero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = i2zero ! gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = i2zero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + + if (delta == i2zero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= i2zero) then + + if (gamma == i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = i2zero + end do + + else if (delta /= i2zero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = i2zero + end do + + else if (delta /= i2zero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= i2zero) then + if (alpha == i2zero) then + if (delta == i2zero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= i2zero) then + if (delta == i2zero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= i2zero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_i2abgdxyz + +subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_i2pk_), intent (in) :: x(:) + integer(psb_i2pk_), intent (inout) :: y(:) + integer(psb_i2pk_), intent (inout) :: z(:) + integer(psb_i2pk_), intent (inout) :: w(:) + integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='i2abgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==i2zero).or.(b==i2zero).or. & + & (c==i2zero).or.(d==i2zero).or.& + & (e==i2zero).or.(f==i2zero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_i2xyzw diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 05c8e60f..346fd897 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1567,3 +1567,300 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine maxpbyv2 + +subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='mabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == mzero) then + if (gamma == mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = mzero + end do + else if (delta /= mzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = mzero + end do + else if (delta /= mzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= mzero) then + + if (alpha == mzero) then + + if (delta == mzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = mzero ! gamma*y(i) + end do + + else if (delta /= mzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = mzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + + if (delta == mzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= mzero) then + + if (gamma == mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = mzero + end do + + else if (delta /= mzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = mzero + end do + + else if (delta /= mzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= mzero) then + if (alpha == mzero) then + if (delta == mzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= mzero) then + if (delta == mzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= mzero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_mabgdxyz + +subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + integer(psb_mpk_), intent (in) :: x(:) + integer(psb_mpk_), intent (inout) :: y(:) + integer(psb_mpk_), intent (inout) :: z(:) + integer(psb_mpk_), intent (inout) :: w(:) + integer(psb_mpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='mabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==mzero).or.(b==mzero).or. & + & (c==mzero).or.(d==mzero).or.& + & (e==mzero).or.(f==mzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_mxyzw diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index 26a57e68..52f86bcd 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1567,3 +1567,300 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine saxpbyv2 + +subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='sabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == szero) then + if (gamma == szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = szero + end do + else if (delta /= szero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = delta*z(i) + end do + end if + else if (alpha /= szero) then + if (delta == szero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = szero + end do + else if (delta /= szero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= szero) then + + if (alpha == szero) then + + if (delta == szero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = szero ! gamma*y(i) + end do + + else if (delta /= szero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = szero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= szero) then + + if (delta == szero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= szero) then + + if (gamma == szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = szero + end do + + else if (delta /= szero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= szero) then + if (delta == szero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = szero + end do + + else if (delta /= szero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= szero) then + if (alpha == szero) then + if (delta == szero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= szero) then + if (delta == szero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= szero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_sabgdxyz + +subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent (in) :: x(:) + real(psb_spk_), intent (inout) :: y(:) + real(psb_spk_), intent (inout) :: z(:) + real(psb_spk_), intent (inout) :: w(:) + real(psb_spk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='sabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==szero).or.(b==szero).or. & + & (c==szero).or.(d==szero).or.& + & (e==szero).or.(f==szero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_sxyzw diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 0b15b2d6..7e680273 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1567,3 +1567,300 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) return end subroutine zaxpbyv2 + +subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='zabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if (beta == zzero) then + if (gamma == zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = zzero + end do + else if (delta /= zzero) then + ! a 0 b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = delta*z(i) + end do + end if + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b 0 g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = zzero + end do + else if (delta /= zzero) then + ! a n b 0 g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = delta*z(i) + end do + + end if + + end if + + else if (gamma /= zzero) then + + if (alpha == zzero) then + + if (delta == zzero) then + ! a 0 b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = zzero ! gamma*y(i) + end do + + else if (delta /= zzero) then + ! a 0 b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = zzero + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + + if (delta == zzero) then + ! a n b 0 g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a n b 0 g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + end if + + end if + + else if (beta /= zzero) then + + if (gamma == zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = zzero + end do + + else if (delta /= zzero) then + ! a 0 b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b n g 0 d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = zzero + end do + + else if (delta /= zzero) then + ! a n b n g 0 d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = delta*z(i) + end do + + end if + + end if + else if (gamma /= zzero) then + if (alpha == zzero) then + if (delta == zzero) then + ! a 0 b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a 0 b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + + else if (alpha /= zzero) then + if (delta == zzero) then + ! a n b n g n d 0 + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i) + end do + + else if (delta /= zzero) then + ! a n b n g n d n + !$omp parallel do private(i) + do i=1,m + y(i) = alpha*x(i)+beta*y(i) + z(i) = gamma*y(i)+delta*z(i) + end do + + end if + end if + end if + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_zabgdxyz + +subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) + use psb_const_mod + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent (in) :: x(:) + complex(psb_dpk_), intent (inout) :: y(:) + complex(psb_dpk_), intent (inout) :: z(:) + complex(psb_dpk_), intent (inout) :: w(:) + complex(psb_dpk_), intent (in) :: a,b,c,d,e,f + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + integer(psb_ipk_) :: int_err(5) + character name*20 + name='zabgdxyz' + + info = psb_success_ + if (m.lt.0) then + info=psb_err_iarg_neg_ + int_err(1)=1 + int_err(2)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(x).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=6 + int_err(2)=1 + int_err(3)=size(x) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(y).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=7 + int_err(2)=1 + int_err(3)=size(y) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + else if (size(z).lt.max(1,m)) then + info=psb_err_iarg_not_gtia_ii_ + int_err(1)=8 + int_err(2)=1 + int_err(3)=size(z) + int_err(4)=m + call fcpsb_errpush(info,name,int_err) + goto 9999 + endif + + if ((a==zzero).or.(b==zzero).or. & + & (c==zzero).or.(d==zzero).or.& + & (e==zzero).or.(f==zzero)) then + write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero' + else + !$omp parallel do private(i) + do i=1,m + y(i) = a*x(i)+b*y(i) + z(i) = c*y(i)+d*z(i) + w(i) = e*z(i)+f*w(i) + end do + + end if + + return + +9999 continue + call fcpsb_serror() + return + +end subroutine psi_zxyzw diff --git a/base/tools/psb_cspasb.f90 b/base/tools/psb_cspasb.f90 index 0c5f14ab..db8af75a 100644 --- a/base/tools/psb_cspasb.f90 +++ b/base/tools/psb_cspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_cspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_c_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,7 +177,49 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) end if - + if (bld_and_) then +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_c_coo_sparse_mat) :: acoo +!!$ type(psb_c_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_c_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_dspasb.f90 b/base/tools/psb_dspasb.f90 index 3132f249..236568a1 100644 --- a/base/tools/psb_dspasb.f90 +++ b/base/tools/psb_dspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_dspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_d_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,7 +177,49 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) end if - + if (bld_and_) then +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_d_coo_sparse_mat) :: acoo +!!$ type(psb_d_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_d_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_icdasb.F90 b/base/tools/psb_icdasb.F90 index 31d92133..04f95738 100644 --- a/base/tools/psb_icdasb.F90 +++ b/base/tools/psb_icdasb.F90 @@ -67,7 +67,7 @@ subroutine psb_icdasb(desc,info,ext_hv,mold) integer(psb_mpk_) :: icomm integer(psb_ipk_) :: np,me logical :: ext_hv_ - logical, parameter :: do_timings=.true. + logical, parameter :: do_timings=.false. integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 integer(psb_ipk_), save :: idx_phase11=-1, idx_phase12=-1, idx_phase13=-1 integer(psb_ipk_), save :: idx_total=-1 diff --git a/base/tools/psb_sspasb.f90 b/base/tools/psb_sspasb.f90 index cfa316eb..110097c5 100644 --- a/base/tools/psb_sspasb.f90 +++ b/base/tools/psb_sspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_sspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_s_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,7 +177,49 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) end if - + if (bld_and_) then +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_s_coo_sparse_mat) :: acoo +!!$ type(psb_s_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_s_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/base/tools/psb_zspasb.f90 b/base/tools/psb_zspasb.f90 index aeeef94d..2cb53368 100644 --- a/base/tools/psb_zspasb.f90 +++ b/base/tools/psb_zspasb.f90 @@ -44,7 +44,7 @@ ! psb_upd_perm_ Permutation(more memory) ! ! -subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) +subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and) use psb_base_mod, psb_protect_name => psb_zspasb use psb_sort_mod use psi_mod @@ -58,6 +58,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_), optional, intent(in) :: upd character(len=*), optional, intent(in) :: afmt class(psb_z_base_sparse_mat), intent(in), optional :: mold + logical, intent(in), optional :: bld_and !....Locals.... type(psb_ctxt_type) :: ctxt integer(psb_ipk_) :: np,me, err_act @@ -65,6 +66,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) integer(psb_ipk_) :: debug_level, debug_unit character(len=20) :: name, ch_err class(psb_i_base_vect_type), allocatable :: ivm + logical :: bld_and_ info = psb_success_ name = 'psb_spasb' @@ -93,7 +95,11 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) if (debug_level >= psb_debug_ext_)& & write(debug_unit, *) me,' ',trim(name),& & ' Begin matrix assembly...' - + if (present(bld_and)) then + bld_and_ = bld_and + else + bld_and_ = .false. + end if !check on errors encountered in psdspins if (a%is_bld()) then @@ -171,7 +177,49 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) end if - + if (bld_and_) then +!!$ allocate(a%ad,mold=a%a) +!!$ allocate(a%and,mold=a%a)o + call a%split_nd(n_row,n_col,info) +!!$ block +!!$ character(len=1024) :: fname +!!$ type(psb_z_coo_sparse_mat) :: acoo +!!$ type(psb_z_csr_sparse_mat), allocatable :: aclip +!!$ type(psb_z_ecsr_sparse_mat), allocatable :: andclip +!!$ logical, parameter :: use_ecsr=.true. +!!$ allocate(aclip) +!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.) +!!$ allocate(a%ad,mold=a%a) +!!$ call a%ad%mv_from_coo(acoo,info) +!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.) +!!$ if (use_ecsr) then +!!$ allocate(andclip) +!!$ call andclip%mv_from_coo(acoo,info) +!!$ call move_alloc(andclip,a%and) +!!$ else +!!$ allocate(a%and,mold=a%a) +!!$ call a%and%mv_from_coo(acoo,info) +!!$ end if +!!$ if (.false.) then +!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%ad%print(25) +!!$ close(25) +!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx' +!!$ open(25,file=fname) +!!$ call a%and%print(25) +!!$ close(25) +!!$ !call andclip%set_cols(n_col) +!!$ write(*,*) me,' ',trim(name),' ad ',& +!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col +!!$ write(*,*) me,' ',trim(name),' and ',& +!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col +!!$ end if +!!$ end block + else + if (allocated(a%ad)) deallocate(a%ad) + if (allocated(a%and)) deallocate(a%and) + end if if (debug_level >= psb_debug_ext_) then ch_err=a%get_fmt() write(debug_unit, *) me,' ',trim(name),': From SPCNV',& diff --git a/config/pac.m4 b/config/pac.m4 index 69d2f863..72ad3d6a 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2018,3 +2018,321 @@ CPPFLAGS="$SAVE_CPPFLAGS"; ])dnl +dnl @synopsis PAC_ARG_WITH_LIBRSB +dnl +dnl Test for --with-librsb="pathname". +dnl +dnl Defines the path to LIBRSB build dir. +dnl +dnl note: Renamed after PAC_ARG_WITH_LIBS as in the Trilinos package. +dnl +dnl Example use: +dnl +dnl PAC_ARG_WITH_LIBRSB +dnl +dnl tests for --with-librsb and pre-pends to LIBRSB_PATH +dnl +dnl @author Salvatore Filippone +dnl + +AC_DEFUN(PAC_ARG_WITH_LIBRSB, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + + AC_ARG_WITH(librsb, + AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example, + --with-librsb=/opt/packages/librsb]), + [pac_cv_librsb_dir=$withval], + [pac_cv_librsb_dir='']) + + if test "x$pac_cv_librsb_dir" != "x"; then + LIBS="-L$pac_cv_librsb_dir $LIBS" + RSB_INCLUDES="-I$pac_cv_librsb_dir" + # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + RSB_LIBDIR="-L$pac_cv_librsb_dir" + fi + #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir]) + AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h], + [pac_rsb_header_ok=yes], + [pac_rsb_header_ok=no; RSB_INCLUDES=""]) + + if test "x$pac_rsb_header_ok" == "xyes" ; then + RSB_LIBS="-lrsb $RSB_LIBDIR" + # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + # AC_TRY_LINK_FUNC(spgpuCreate, + # [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + # [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + # AC_MSG_RESULT($pac_gpu_lib_ok) + # if test "x$pac_cv_have_spgpu" == "xyes" ; then + # AC_MSG_NOTICE([Have found SPGPU]) + RSBLIBNAME="librsb.a"; + LIBRSB_DIR="$pac_cv_librsb_dir"; + # SPGPU_DEFINES="-DHAVE_SPGPU"; + LIBRSB_INCDIR="$LIBRSB_DIR"; + LIBRSB_INCLUDES="-I$LIBRSB_INCDIR"; + LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR"; + # CUDA_DIR="$pac_cv_cuda_dir"; + LIBRSB_DEFINES="-DHAVE_RSB"; + LRSB=-lpsb_rsb + # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES"; + CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES"; + fi +# fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +]) +dnl + + +dnl @synopsis PAC_CHECK_SPGPU +dnl +dnl Will try to find the spgpu library and headers. +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_SPGPU, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + if test "x$pac_cv_have_cuda" == "x"; then + PAC_CHECK_CUDA() + fi +dnl AC_MSG_NOTICE([From CUDA: $pac_cv_have_cuda ]) + if test "x$pac_cv_have_cuda" == "xyes"; then + AC_ARG_WITH(spgpu, AC_HELP_STRING([--with-spgpu=DIR], [Specify the directory for SPGPU library and includes.]), + [pac_cv_spgpudir=$withval], + [pac_cv_spgpudir='']) + + AC_LANG([C]) + if test "x$pac_cv_spgpudir" != "x"; then + LIBS="-L$pac_cv_spgpudir/lib $LIBS" + GPU_INCLUDES="-I$pac_cv_spgpudir/include" + CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + GPU_LIBDIR="-L$pac_cv_spgpudir/lib" + fi + AC_MSG_CHECKING([spgpu dir $pac_cv_spgpudir]) + AC_CHECK_HEADER([core.h], + [pac_gpu_header_ok=yes], + [pac_gpu_header_ok=no; GPU_INCLUDES=""]) + + if test "x$pac_gpu_header_ok" == "xyes" ; then + GPU_LIBS="-lspgpu $GPU_LIBDIR" + LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + AC_TRY_LINK_FUNC(spgpuCreate, + [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + AC_MSG_RESULT($pac_gpu_lib_ok) + if test "x$pac_cv_have_spgpu" == "xyes" ; then + AC_MSG_NOTICE([Have found SPGPU]) + SPGPULIBNAME="libpsbgpu.a"; + SPGPU_DIR="$pac_cv_spgpudir"; + SPGPU_DEFINES="-DHAVE_SPGPU"; + SPGPU_INCDIR="$SPGPU_DIR/include"; + SPGPU_INCLUDES="-I$SPGPU_INCDIR"; + SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib"; + LGPU=-lpsb_gpu + CUDA_DIR="$pac_cv_cuda_dir"; + CUDA_DEFINES="-DHAVE_CUDA"; + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES"; + CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ; + fi + fi +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + + + + +dnl @synopsis PAC_CHECK_CUDA +dnl +dnl Will try to find the cuda library and headers. +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_CUDA, +[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]), + [pac_cv_cuda_dir=$withval], + [pac_cv_cuda_dir='']) + +AC_LANG([C]) +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +AC_MSG_CHECKING([cuda dir $pac_cv_cuda_dir]) +AC_CHECK_HEADER([cuda_runtime.h], + [pac_cuda_header_ok=yes], + [pac_cuda_header_ok=no; CUDA_INCLUDES=""]) + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for cudaMemcpy in $CUDA_LIBS]) + AC_TRY_LINK_FUNC(cudaMemcpy, + [pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; ], + [pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS=""]) + AC_MSG_RESULT($pac_cuda_lib_ok) + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + +dnl @synopsis PAC_ARG_WITH_CUDACC +dnl +dnl Test for --with-cudacc="set_of_cc". +dnl +dnl Defines the CC to compile for +dnl +dnl +dnl Example use: +dnl +dnl PAC_ARG_WITH_CUDACC +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN([PAC_ARG_WITH_CUDACC], +[ +AC_ARG_WITH(cudacc, +AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, for example, + --with-cudacc=50,60,70,75]), +[pac_cv_cudacc=$withval], +[pac_cv_cudacc='']) +]) + +AC_DEFUN(PAC_ARG_WITH_LIBRSB, + [SAVE_LIBS="$LIBS" + SAVE_CPPFLAGS="$CPPFLAGS" + + AC_ARG_WITH(librsb, + AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example, + --with-librsb=/opt/packages/librsb]), + [pac_cv_librsb_dir=$withval], + [pac_cv_librsb_dir='']) + + if test "x$pac_cv_librsb_dir" != "x"; then + LIBS="-L$pac_cv_librsb_dir $LIBS" + RSB_INCLUDES="-I$pac_cv_librsb_dir" + # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" + RSB_LIBDIR="-L$pac_cv_librsb_dir" + fi + #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir]) + AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h], + [pac_rsb_header_ok=yes], + [pac_rsb_header_ok=no; RSB_INCLUDES=""]) + + if test "x$pac_rsb_header_ok" == "xyes" ; then + RSB_LIBS="-lrsb $RSB_LIBDIR" + # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; + # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) + # AC_TRY_LINK_FUNC(spgpuCreate, + # [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], + # [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) + # AC_MSG_RESULT($pac_gpu_lib_ok) + # if test "x$pac_cv_have_spgpu" == "xyes" ; then + # AC_MSG_NOTICE([Have found SPGPU]) + RSBLIBNAME="librsb.a"; + LIBRSB_DIR="$pac_cv_librsb_dir"; + # SPGPU_DEFINES="-DHAVE_SPGPU"; + LIBRSB_INCDIR="$LIBRSB_DIR"; + LIBRSB_INCLUDES="-I$LIBRSB_INCDIR"; + LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR"; + # CUDA_DIR="$pac_cv_cuda_dir"; + LIBRSB_DEFINES="-DHAVE_RSB"; + LRSB=-lpsb_rsb + # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES"; + CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES"; + fi +# fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +]) +dnl + +dnl @synopsis PAC_CHECK_CUDA_VERSION +dnl +dnl Will try to find the cuda version +dnl +dnl Will use $CC +dnl +dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND. +dnl Note : This file will be likely to induce the compiler to create a module file +dnl (for a module called conftest). +dnl Depending on the compiler flags, this could cause a conftest.mod file to appear +dnl in the present directory, or in another, or with another name. So be warned! +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN(PAC_CHECK_CUDA_VERSION, +[AC_LANG_PUSH([C]) +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_have_cuda" == "x"; then + PAC_CHECK_CUDA() +fi +if test "x$pac_cv_have_cuda" == "xyes"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + AC_MSG_CHECKING([for CUDA version]) + AC_LINK_IFELSE([AC_LANG_SOURCE([ +#include +#include + +int main(int argc, char **argv) +{ + printf("%d",CUDA_VERSION); + return(0); +} ])], + [pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`;], + [pac_cv_cuda_version="unknown";]) + + AC_MSG_RESULT($pac_cv_cuda_version) + fi +AC_LANG_POP([C]) +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" +])dnl + + diff --git a/configure b/configure index 504dbf4c..0c59d9fd 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for PSBLAS 3.7.0. +# Generated by GNU Autoconf 2.71 for PSBLAS 3.8.1. # # Report bugs to . # @@ -611,8 +611,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='PSBLAS' PACKAGE_TARNAME='psblas' -PACKAGE_VERSION='3.7.0' -PACKAGE_STRING='PSBLAS 3.7.0' +PACKAGE_VERSION='3.8.1' +PACKAGE_STRING='PSBLAS 3.8.1' PACKAGE_BUGREPORT='https://github.com/sfilippone/psblas3/issues' PACKAGE_URL='' @@ -653,6 +653,20 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS +LCUDA +CUDALD +CUDAD +CUDEFINES +CUDA_NVCC +CUDA_SHORT_VERSION +CUDA_VERSION +CUDA_LIBS +CUDA_INCLUDES +CUDA_DEFINES +CUDA_DIR +EXTRALDLIBS +SPGPU_LIBS +SPGPU_FLAGS METISINCFILE UTILLIBNAME METHDLIBNAME @@ -815,7 +829,6 @@ enable_openmp with_blas with_blasdir with_lapack -with_rsb with_metis with_metisincfile with_metisdir @@ -825,6 +838,8 @@ with_amd with_amddir with_amdincdir with_amdlibdir +with_cuda +with_cudacc ' ac_precious_vars='build_alias host_alias @@ -1390,7 +1405,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures PSBLAS 3.7.0 to adapt to many kinds of systems. +\`configure' configures PSBLAS 3.8.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1457,7 +1472,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of PSBLAS 3.7.0:";; + short | recursive ) echo "Configuration of PSBLAS 3.8.1:";; esac cat <<\_ACEOF @@ -1505,12 +1520,6 @@ Optional Packages: --with-blas= use BLAS library --with-blasdir= search for BLAS library in --with-lapack= use LAPACK library - --with-rsb Specify Recursive Sparse BLAS library linkage info - (that is, the output of librsb-config --static - --ldflags, or a directory where the usual - bin/include/lib subdirs with a regular RSB - installation resides, or nothing to make the - configure script invoke librsb-config) --with-metis=LIBNAME Specify the library name for METIS library. Default: "-lmetis" --with-metisincfile=DIR Specify the name for METIS include file. @@ -1523,6 +1532,9 @@ Optional Packages: --with-amddir=DIR Specify the directory for AMD library and includes. --with-amdincdir=DIR Specify the directory for AMD includes. --with-amdlibdir=DIR Specify the directory for AMD library. + --with-cuda=DIR Specify the CUDA install directory. + --with-cudacc A comma-separated list of CCs to compile to, for + example, --with-cudacc=50,60,70,75 Some influential environment variables: FC Fortran compiler command @@ -1607,7 +1619,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -PSBLAS configure 3.7.0 +PSBLAS configure 3.8.1 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -2291,7 +2303,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by PSBLAS $as_me 3.7.0, which was +It was created by PSBLAS $as_me 3.8.1, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3265,7 +3277,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu # VERSION is the file containing the PSBLAS version code # FIXME -psblas_cv_version="3.7.0" +psblas_cv_version="3.8.1" # A sample source file @@ -3280,7 +3292,7 @@ psblas_cv_version="3.7.0" documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -3294,7 +3306,7 @@ printf "%s\n" "$as_me: documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -3440,7 +3452,7 @@ ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_fc_compiler_gnu if test -n "$ac_tool_prefix"; then - for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran + for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang-new ifort ifc nagfor gfortran do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -3489,7 +3501,7 @@ fi fi if test -z "$FC"; then ac_ct_FC=$FC - for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran + for ac_prog in ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang-new ifort ifc nagfor gfortran do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -3916,7 +3928,7 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu if test -n "$ac_tool_prefix"; then - for ac_prog in xlc pgcc icc gcc cc + for ac_prog in xlc pgcc clang icc gcc cc do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -3965,7 +3977,7 @@ fi fi if test -z "$CC"; then ac_ct_CC=$CC - for ac_prog in xlc pgcc icc gcc cc + for ac_prog in xlc pgcc clang icc gcc cc do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -4412,7 +4424,7 @@ if test -z "$CXX"; then CXX=$CCC else if test -n "$ac_tool_prefix"; then - for ac_prog in CC xlc++ icpc g++ + for ac_prog in CC xlc++ clang++ icpc g++ do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 @@ -4461,7 +4473,7 @@ fi fi if test -z "$CXX"; then ac_ct_CXX=$CXX - for ac_prog in CC xlc++ icpc g++ + for ac_prog in CC xlc++ clang++ icpc g++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 @@ -6393,7 +6405,7 @@ fi # Define the identity of the package. PACKAGE='psblas' - VERSION='3.7.0' + VERSION='3.8.1' printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h @@ -6862,10 +6874,12 @@ if test x"$psblas_cv_fc" == "x" ; then # Sun compiler detection psblas_cv_fc="sun" - elif eval "$MPIFC -V 2>&1 | grep Portland 2>/dev/null" ; then + elif eval "$MPIFC --version 2>&1 | grep flang-new 2>/dev/null" ; then # Portland group compiler detection - psblas_cv_fc="pg" + psblas_cv_fc="flang" + psblas_cv_define_prepend="" + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" elif eval "$MPIFC -V 2>&1 | grep Intel.*Fortran.*Compiler 2>/dev/null" ; then # Intel compiler identification @@ -7289,6 +7303,9 @@ if test "X$FCOPT" == "X" ; then elif test "X$psblas_cv_fc" == X"pg" ; then # other compilers .. FCOPT="-fast $FCOPT" + elif test "X$psblas_cv_fc" == X"flang" ; then + # other compilers .. + FCOPT="-O3" # NOTE : PG & Sun use -fast instead -O3 elif test "X$psblas_cv_fc" == X"sun" ; then # other compilers .. @@ -7533,7 +7550,6 @@ fi if test x"$pac_cv_serial_mpi" == x"yes" ; then FDEFINES="$psblas_cv_define_prepend-DSERIAL_MPI $psblas_cv_define_prepend-DMPI_MOD $FDEFINES"; CDEFINES="-DSERIAL_MPI $CDEFINES" - CXXDEFINES="-DSERIAL_MPI $CXXDEFINES" else { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking MPI Fortran 2008 interface" >&5 printf %s "checking MPI Fortran 2008 interface... " >&6; } @@ -10001,31 +10017,6 @@ fi #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) -# Check whether --with-rsb was given. -if test ${with_rsb+y} -then : - withval=$with_rsb; if test x"$withval" = xno; then -want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi -else $as_nop - want_rsb_libs="" -fi - -if test x"$want_rsb_libs" != x ; then - if test x"$want_rsb_libs" = xyes ; then - want_rsb_libs="`librsb-config --static --ldflags`" - else - if test -d "$want_rsb_libs" ; then - want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" - else - true; - # we assume want_rsb_libs are linkage parameters - fi - fi - FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" -fi -RSB_LIBS="$want_rsb_libs" -LIBS="$RSB_LIBS ${LIBS}" - # Check whether --with-metis was given. if test ${with_metis+y} @@ -10612,6 +10603,274 @@ fi + +# Check whether --with-cuda was given. +if test ${with_cuda+y} +then : + withval=$with_cuda; pac_cv_cuda_dir=$withval +else $as_nop + pac_cv_cuda_dir='' +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else $as_nop + pac_cuda_header_ok=no; CUDA_INCLUDES="" +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cudaMemcpy (); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else $as_nop + pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + + +if test "x$pac_cv_have_cuda" == "xyes"; then + + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_have_cuda" == "x"; then + +# Check whether --with-cuda was given. +if test ${with_cuda+y} +then : + withval=$with_cuda; pac_cv_cuda_dir=$withval +else $as_nop + pac_cv_cuda_dir='' +fi + + +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +SAVE_LIBS="$LIBS" +SAVE_CPPFLAGS="$CPPFLAGS" +if test "x$pac_cv_cuda_dir" != "x"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + if test -f "$pac_cv_cuda_dir/bin/nvcc"; then + CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc" + else + CUDA_NVCC="nvcc" + fi +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking cuda dir $pac_cv_cuda_dir" >&5 +printf %s "checking cuda dir $pac_cv_cuda_dir... " >&6; } +ac_fn_c_check_header_compile "$LINENO" "cuda_runtime.h" "ac_cv_header_cuda_runtime_h" "$ac_includes_default" +if test "x$ac_cv_header_cuda_runtime_h" = xyes +then : + pac_cuda_header_ok=yes +else $as_nop + pac_cuda_header_ok=no; CUDA_INCLUDES="" +fi + + +if test "x$pac_cuda_header_ok" == "xyes" ; then + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cudaMemcpy in $CUDA_LIBS" >&5 +printf %s "checking for cudaMemcpy in $CUDA_LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char cudaMemcpy (); +int +main (void) +{ +return cudaMemcpy (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; +else $as_nop + pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS="" +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cuda_lib_ok" >&5 +printf "%s\n" "$pac_cuda_lib_ok" >&6; } + +fi +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + +fi +if test "x$pac_cv_have_cuda" == "xyes"; then + CUDA_DIR="$pac_cv_cuda_dir" + LIBS="-L$pac_cv_cuda_dir/lib $LIBS" + CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" + CUDA_DEFINES="-DHAVE_CUDA" + CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS" + CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" + CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR" + LIBS="$CUDA_LIBS -lm $LIBS"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for CUDA version" >&5 +printf %s "checking for CUDA version... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#include +#include + +int main(int argc, char **argv) +{ + printf("%d",CUDA_VERSION); + return(0); +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`; +else $as_nop + pac_cv_cuda_version="unknown"; +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pac_cv_cuda_version" >&5 +printf "%s\n" "$pac_cv_cuda_version" >&6; } + fi +ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + +LIBS="$SAVE_LIBS" +CPPFLAGS="$SAVE_CPPFLAGS" + + CUDA_VERSION="$pac_cv_cuda_version"; + CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); + HAVE_CUDA="yes"; + SPGPU_LIBS="-lspgpu"; + CUDAD=cudad; + CUDALD=cudald; + LCUDA="-lpsb_cuda"; + EXTRALDLIBS="-lstdc++"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: At this point GPUTARGET is $CUDAD $CUDALD" >&5 +printf "%s\n" "$as_me: At this point GPUTARGET is $CUDAD $CUDALD" >&6;} + + + +# Check whether --with-cudacc was given. +if test ${with_cudacc+y} +then : + withval=$with_cudacc; pac_cv_cudacc=$withval +else $as_nop + pac_cv_cudacc='' +fi + + + if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="50,60,70,75"; + CUDA_CC="$pac_cv_cudacc"; + fi + if (( $pac_cv_cuda_version >= 11070 )) + then + CUDEFINES="--dopt=on"; + fi + for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` + do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; + done + if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + fi +fi + +if test "x$pac_cv_ipk_size" != "x4"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5 +printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;} + HAVE_CUDA="no"; + CUDA_CC=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; + CUDEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; +fi + + + + ############################################################################### # Library target directory and archive files. ############################################################################### @@ -10669,9 +10928,9 @@ UTILLIBNAME=libpsb_util.a PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CXXDEFINES=$(PSBCXXDEFINES) -CDEFINES=$(PSBCDEFINES) -FDEFINES=$(PSBFDEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -10690,6 +10949,19 @@ FDEFINES=$(PSBFDEFINES) + + + + + + + + + + + + + @@ -11264,7 +11536,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by PSBLAS $as_me 3.7.0, which was +This file was extended by PSBLAS $as_me 3.8.1, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -11323,7 +11595,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -PSBLAS config.status 3.7.0 +PSBLAS config.status 3.8.1 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" @@ -12024,6 +12296,9 @@ fi FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${HAVE_CUDA} + CUDA_CC : ${pac_cv_cudacc} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} @@ -12054,6 +12329,9 @@ printf "%s\n" "$as_me: FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${HAVE_CUDA} + CUDA_CC : ${pac_cv_cudacc} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} diff --git a/configure.ac b/configure.ac index 47b94e7a..27cb9f38 100755 --- a/configure.ac +++ b/configure.ac @@ -36,11 +36,11 @@ dnl NOTE : There is no cross compilation support. ############################################################################### # NOTE: the literal for version (the second argument to AC_INIT should be a literal!) -AC_INIT([PSBLAS],3.7.0, [https://github.com/sfilippone/psblas3/issues]) +AC_INIT([PSBLAS],3.8.1, [https://github.com/sfilippone/psblas3/issues]) # VERSION is the file containing the PSBLAS version code # FIXME -psblas_cv_version="3.7.0" +psblas_cv_version="3.8.1" # A sample source file AC_CONFIG_SRCDIR([base/modules/psb_base_mod.f90]) @@ -56,7 +56,7 @@ AC_MSG_NOTICE([ documentation, you can make your own by hand for your needs. Be sure to specify the library paths of your interest. Examples: - ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS + ./configure --with-libs=-L/some/directory/LIB <- will append to LIBS FC=mpif90 CC=mpicc ./configure <- will force FC,CC See ./configure --help=short fore more info. @@ -100,17 +100,17 @@ dnl We set our own FC flags, ignore those from AC_PROG_FC but not those from the dnl environment variable. Same for C dnl save_FCFLAGS="$FCFLAGS"; -AC_PROG_FC([ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 ifort ifc nagfor gfortran]) +AC_PROG_FC([ftn xlf2003_r xlf2003 xlf95_r xlf95 xlf90 xlf pgf95 pgf90 flang-new ifort ifc nagfor gfortran]) FCFLAGS="$save_FCFLAGS"; save_CFLAGS="$CFLAGS"; -AC_PROG_CC([xlc pgcc icc gcc cc ]) +AC_PROG_CC([xlc pgcc clang icc gcc cc ]) if test "x$ac_cv_prog_cc_stdc" == "xno" ; then AC_MSG_ERROR([Problem : Need a C99 compiler ! ]) else C99OPT="$ac_cv_prog_cc_stdc"; fi CFLAGS="$save_CFLAGS"; -AC_PROG_CXX([CC xlc++ icpc g++]) +AC_PROG_CXX([CC xlc++ clang++ icpc g++]) dnl AC_PROG_F90 doesn't exist, at the time of writing this ! dnl AC_PROG_F90 @@ -204,7 +204,7 @@ PAC_ARG_WITH_FLAGS(module-path,MODULE_PATH) # we just gave the user the chance to append values to these variables -############################################################################### + dnl Library oriented Autotools facilities (we don't care about this for now) @@ -248,10 +248,12 @@ if test x"$psblas_cv_fc" == "x" ; then # Sun compiler detection psblas_cv_fc="sun" - elif eval "$MPIFC -V 2>&1 | grep Portland 2>/dev/null" ; then + elif eval "$MPIFC --version 2>&1 | grep flang-new 2>/dev/null" ; then # Portland group compiler detection - psblas_cv_fc="pg" + psblas_cv_fc="flang" + psblas_cv_define_prepend="" + FDEFINES="$psblas_cv_define_prepend-DFLANG $FDEFINES" elif eval "$MPIFC -V 2>&1 | grep Intel.*Fortran.*Compiler 2>/dev/null" ; then # Intel compiler identification @@ -427,6 +429,9 @@ if test "X$FCOPT" == "X" ; then elif test "X$psblas_cv_fc" == X"pg" ; then # other compilers .. FCOPT="-fast $FCOPT" + elif test "X$psblas_cv_fc" == X"flang" ; then + # other compilers .. + FCOPT="-O3" # NOTE : PG & Sun use -fast instead -O3 elif test "X$psblas_cv_fc" == X"sun" ; then # other compilers .. @@ -730,23 +735,23 @@ PAC_MAKE_IS_GNUMAKE # Note : also umfdi_local_search, ... #AC_CHECK_LIB(umf,umfpack_di_solve,psblas_cv_have_umfpack=yes,psblas_cv_have_umfpack=no,[amd]) -AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then -want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""]) -if test x"$want_rsb_libs" != x ; then - if test x"$want_rsb_libs" = xyes ; then - want_rsb_libs="`librsb-config --static --ldflags`" - else - if test -d "$want_rsb_libs" ; then - want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" - else - true; - # we assume want_rsb_libs are linkage parameters - fi - fi - FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" -fi -RSB_LIBS="$want_rsb_libs" -LIBS="$RSB_LIBS ${LIBS}" +dnl AC_ARG_WITH(rsb, AS_HELP_STRING([--with-rsb], [Specify Recursive Sparse BLAS library linkage info (that is, the output of librsb-config --static --ldflags, or a directory where the usual bin/include/lib subdirs with a regular RSB installation resides, or nothing to make the configure script invoke librsb-config)]), [if test x"$withval" = xno; then +dnl want_rsb_libs= ; else if test x"$withval" = xyes ; then want_rsb_libs=yes ; else want_rsb_libs="$withval" ; fi ; fi], [want_rsb_libs=""]) +dnl if test x"$want_rsb_libs" != x ; then +dnl if test x"$want_rsb_libs" = xyes ; then +dnl want_rsb_libs="`librsb-config --static --ldflags`" +dnl else +dnl if test -d "$want_rsb_libs" ; then +dnl want_rsb_libs="`$want_rsb_libs/bin/librsb-config --static --ldflags`" +dnl else +dnl true; +dnl # we assume want_rsb_libs are linkage parameters +dnl fi +dnl fi +dnl FDEFINES="$FDEFINES $psblas_cv_define_prepend-DHAVE_LIBRSB" +dnl fi +dnl RSB_LIBS="$want_rsb_libs" +dnl LIBS="$RSB_LIBS ${LIBS}" dnl AC_CHECK_HEADERS([rsb.h], [ LIBS="${LIBS} $want_rsb_libs"], []) PAC_CHECK_METIS @@ -791,6 +796,81 @@ fi +PAC_CHECK_CUDA() + +if test "x$pac_cv_have_cuda" == "xyes"; then + + PAC_CHECK_CUDA_VERSION() + CUDA_VERSION="$pac_cv_cuda_version"; + CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); + dnl PAC_CHECK_SPGPU() + HAVE_CUDA="yes"; + SPGPU_LIBS="-lspgpu"; + CUDAD=cudad; + CUDALD=cudald; + LCUDA="-lpsb_cuda"; + EXTRALDLIBS="-lstdc++"; + AC_MSG_NOTICE([At this point GPUTARGET is $CUDAD $CUDALD]) + + PAC_ARG_WITH_CUDACC() + if test "x$pac_cv_cudacc" == "x"; then + pac_cv_cudacc="50,60,70,75"; + CUDA_CC="$pac_cv_cudacc"; + fi + if (( $pac_cv_cuda_version >= 11070 )) + then + CUDEFINES="--dopt=on"; + fi + for cc in `echo $pac_cv_cudacc|sed 's/,/ /gi'` + do + CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; + done + if test "x$pac_cv_cuda_version" != "xunknown"; then + CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" + fi +fi + +if test "x$pac_cv_ipk_size" != "x4"; then + AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU]) + HAVE_CUDA="no"; + CUDA_CC=""; + SPGPU_LIBS=""; + CUDAD=""; + CUDALD=""; + CUDEFINES=""; + CUDA_INCLUDES=""; + CUDA_LIBS=""; +fi + + +############################################################################### +PAC_ARG_WITH_LIBRSB +LIBRSB_DIR="$pac_cv_librsb_dir"; +AC_MSG_CHECKING([for LIBRSB install dir]) +case $LIBRSB_DIR in + /*) ;; + *) dnl AC_MSG_ERROR([The LIBRSB installation dir must be an absolute pathname + dnl specified with --with-librsb=/path/to/librsb]) +esac +dnl if test ! -d "$LIBRSB_DIR" ; then +dnl AC_MSG_ERROR([Could not find LIBRSB build dir $LIBRSB_DIR!]) +dnl fi +pac_cv_status_file="$LIBRSB_DIR/librsb.a" +if test ! -f "$pac_cv_status_file" ; then + AC_MSG_RESULT([no]) + #AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.]) +else + AC_MSG_RESULT([$LIBRSB_DIR]) + RSBTARGETLIB=rsbd; + RSBTARGETOBJ=rsbobj; +fi + + + + + ############################################################################### # Library target directory and archive files. ############################################################################### @@ -848,9 +928,9 @@ AC_SUBST(FINCLUDES) PSBLASRULES=' PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) -CDEFINES=$(PSBCDEFINES) -FDEFINES=$(PSBFDEFINES) -CXXDEFINES=$(PSBCXXDEFINES) +CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) # These should be portable rules, arent they? @@ -872,6 +952,30 @@ AC_SUBST(PRECLIBNAME) AC_SUBST(METHDLIBNAME) AC_SUBST(UTILLIBNAME) AC_SUBST(METISINCFILE) +AC_SUBST(SPGPU_FLAGS) +AC_SUBST(SPGPU_LIBS) +dnl AC_SUBST(SPGPU_DIR) +dnl AC_SUBST(SPGPU_INCLUDES) +dnl AC_SUBST(SPGPU_INCDIR) +AC_SUBST(EXTRALDLIBS) +AC_SUBST(CUDA_DIR) +AC_SUBST(CUDA_DEFINES) +AC_SUBST(CUDA_INCLUDES) +AC_SUBST(CUDA_LIBS) +AC_SUBST(CUDA_VERSION) +AC_SUBST(CUDA_SHORT_VERSION) +AC_SUBST(CUDA_NVCC) +AC_SUBST(CUDEFINES) +AC_SUBST(CUDAD) +AC_SUBST(CUDALD) +AC_SUBST(LCUDA) +AC_SUBST(LIBRSB_LIBS) +AC_SUBST(LIBRSB_INCLUDES) +AC_SUBST(LIBRSB_INCDIR) +AC_SUBST(LIBRSB_DIR) +AC_SUBST(LIBRSB_DEFINES) +AC_SUBST(LRSB) + ############################################################################### # the following files will be created by Automake @@ -897,6 +1001,9 @@ AC_MSG_NOTICE([ FCOPT : ${FCOPT} CCOPT : ${CCOPT} + CUDA : ${HAVE_CUDA} + CUDA_CC : ${pac_cv_cudacc} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} diff --git a/cuda/CUDA/Makefile b/cuda/CUDA/Makefile new file mode 100644 index 00000000..5bdfb935 --- /dev/null +++ b/cuda/CUDA/Makefile @@ -0,0 +1,37 @@ +TOPDIR=../.. +include $(TOPDIR)/Make.inc +# +# Libraries used +# +PSBLIBDIR=$(PSBLASDIR)/lib/ +PSBINCDIR=$(PSBLASDIR)/include +LIBDIR=$(TOPDIR)/lib +INCDIR=$(TOPDIR)/include +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I.. -I$(INCDIR) +LIBNAME=libpsb_gpu.a + + +CUDAOBJS=psi_cuda_c_CopyCooToElg.o psi_cuda_c_CopyCooToHlg.o \ +psi_cuda_d_CopyCooToElg.o psi_cuda_d_CopyCooToHlg.o \ +psi_cuda_s_CopyCooToElg.o psi_cuda_s_CopyCooToHlg.o \ +psi_cuda_z_CopyCooToElg.o psi_cuda_z_CopyCooToHlg.o + + + +objs: $(CUDAOBJS) + +lib: objs + ar cur ../$(LIBNAME) $(CUDAOBJS) + +$(CUDAOBJS): psi_cuda_common.cuh psi_cuda_CopyCooToElg.cuh psi_cuda_CopyCooToHlg.cuh + +clean: + /bin/rm -f $(CUDAOBJS) diff --git a/cuda/CUDA/psi_cuda_CopyCooToElg.cuh b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh new file mode 100644 index 00000000..5b723be1 --- /dev/null +++ b/cuda/CUDA/psi_cuda_CopyCooToElg.cuh @@ -0,0 +1,103 @@ +#include +#include + +#include "psi_cuda_common.cuh" + + +#undef GEN_PSI_FUNC_NAME +#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToElg) + +#define THREAD_BLOCK 256 + +#ifdef __cplusplus +extern "C" { +#endif + + + void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, + int baseIdx, int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM); + + +#ifdef __cplusplus +} +#endif + + + + + +__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza, + int baseIdx, int hacksz, int ldv, int nzm, + int *rS, int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ + int ir, k, ipnt, rsz,jc; + int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + int i=ii+ki; + int idval=0; + + if (ki >= nrws) return; + if (i >= nr) return; + + ipnt=devIdisp[i]; + rsz=rS[i]; + ir = i; + for (k=0; kcurrentStream >>>(i,nrws, nr, nza, baseIdx, + hacksz, ldv, nzm, + rS,devIdisp,devJa,devVal, + idiag, rP,cM); + +} + + + + +void +GEN_PSI_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ int i, nrws; + //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + //fprintf(stderr,"Loop on j: %d\n",j); + for (i=0; i +#include + +#include "psi_cuda_common.cuh" + + +#undef GEN_PSI_FUNC_NAME +#define GEN_PSI_FUNC_NAME(x) CONCAT(CONCAT(psi_cuda_,x),_CopyCooToHlg) + +#define THREAD_BLOCK 256 + +#ifdef __cplusplus +extern "C" { +#endif + +void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, int hacksz, + int noffs, int isz, int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM); + + + +#ifdef __cplusplus +} +#endif + + +__global__ void CONCAT(GEN_PSI_FUNC_NAME(TYPE_SYMBOL),_krn)(int ii, int nrws, int nr, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ + int ir, k, ipnt, rsz,jc; + int ki = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + int i=ii+ki; + + if (ki >= nrws) return; + + + if (icurrentStream >>>(i,nrws,nr, nza, baseIdx, hacksz, noffs, isz, + rS,hackOffs,devIdisp,devJa,devVal,idiag,rP,cM); + +} + + +void GEN_PSI_FUNC_NAME(TYPE_SYMBOL)(spgpuHandle_t handle, int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *rS, int *hackOffs, int *devIdisp, + int *devJa, VALUE_TYPE *devVal, + int *idiag, int *rP, VALUE_TYPE *cM) +{ int i, nrws; + //int maxNForACall = THREAD_BLOCK*handle->maxGridSizeX; + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + //fprintf(stderr,"Loop on j: %d\n",j); + for (i=0; i +#include + +#include "cintrf.h" +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL c +#include "psi_cuda_CopyCooToElg.cuh" + diff --git a/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu new file mode 100644 index 00000000..e36728b1 --- /dev/null +++ b/cuda/CUDA/psi_cuda_c_CopyCooToHlg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL c +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_common.cuh b/cuda/CUDA/psi_cuda_common.cuh new file mode 100644 index 00000000..12d81f03 --- /dev/null +++ b/cuda/CUDA/psi_cuda_common.cuh @@ -0,0 +1,16 @@ +#pragma once + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) +#define MIN(A,B) ( (A)<(B) ? (A) : (B) ) +#define SQUARE(x) ((x)*(x)) +#define GET_ADDR(a,ix,iy,nc) a[(nc)*(ix)+(iy)] +#define GET_VAL(a,ix,iy,nc) (GET_ADDR(a,ix,iy,nc)) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } + +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +#endif diff --git a/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu new file mode 100644 index 00000000..233bae06 --- /dev/null +++ b/cuda/CUDA/psi_cuda_d_CopyCooToElg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE double +#define TYPE_SYMBOL d +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu new file mode 100644 index 00000000..94e076ae --- /dev/null +++ b/cuda/CUDA/psi_cuda_d_CopyCooToHlg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE double +#define TYPE_SYMBOL d +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu new file mode 100644 index 00000000..e083708c --- /dev/null +++ b/cuda/CUDA/psi_cuda_s_CopyCooToElg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE float +#define TYPE_SYMBOL s +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu new file mode 100644 index 00000000..90ad5fdf --- /dev/null +++ b/cuda/CUDA/psi_cuda_s_CopyCooToHlg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE float +#define TYPE_SYMBOL s +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu new file mode 100644 index 00000000..b5ec817d --- /dev/null +++ b/cuda/CUDA/psi_cuda_z_CopyCooToElg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL z +#include "psi_cuda_CopyCooToElg.cuh" diff --git a/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu new file mode 100644 index 00000000..24d39ec4 --- /dev/null +++ b/cuda/CUDA/psi_cuda_z_CopyCooToHlg.cu @@ -0,0 +1,7 @@ +#include +#include + +#include "cintrf.h" +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL z +#include "psi_cuda_CopyCooToHlg.cuh" diff --git a/cuda/License-spgpu.md b/cuda/License-spgpu.md new file mode 100644 index 00000000..7f4b8ff4 --- /dev/null +++ b/cuda/License-spgpu.md @@ -0,0 +1,21 @@ +(c) Copyright 2011-2021 Davide Barbieri, Salvatore Filippone + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT + SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/cuda/Makefile b/cuda/Makefile new file mode 100755 index 00000000..7e428629 --- /dev/null +++ b/cuda/Makefile @@ -0,0 +1,143 @@ +include ../Make.inc +# +# Libraries used +# +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +PSBLAS_LIB= -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES=$(SPGPU_INCLUDES) $(CUDA_INCLUDES) -I$(INCDIR) +LIBNAME=libpsb_cuda.a + + +FOBJS=cusparse_mod.o base_cusparse_mod.o \ + s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o \ + psb_vectordev_mod.o core_mod.o \ + psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_i_vectordev_mod.o\ + psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_base_vectordev_mod.o \ + elldev_mod.o hlldev_mod.o diagdev_mod.o hdiagdev_mod.o \ + psb_i_cuda_vect_mod.o \ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_cuda_env_mod.o psb_cuda_mod.o \ + psb_d_cuda_diag_mat_mod.o\ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o \ + dnsdev_mod.o + +COBJS= elldev.o hlldev.o diagdev.o hdiagdev.o vectordev.o ivectordev.o dnsdev.o\ + svectordev.o dvectordev.o cvectordev.o zvectordev.o cuda_util.o \ + fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o + +OBJS=$(COBJS) $(FOBJS) + +lib: objs ilib cudalib spgpulib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + + +objs: spgpuinc $(OBJS) iobjs cudaobjs spgpuobjs + /bin/cp -p *$(.mod) $(MODDIR) + /bin/cp -p *.h $(INCDIR) + +spgpuinc: + $(MAKE) -C spgpu includes +spgpuobjs: + $(MAKE) -C spgpu objs +spgpulib: + $(MAKE) -C spgpu lib + + +hdiagdev_mod.o diagdev_mod.o dnsdev_mod.o hlldev_mod.o elldev_mod.o psb_base_vectordev_mod.o: core_mod.o +psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o: psb_i_cuda_vect_mod.o +psb_i_cuda_vect_mod.o : psb_vectordev_mod.o psb_cuda_env_mod.o +cusparse_mod.o: s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o +s_cusparse_mod.o d_cusparse_mod.o c_cusparse_mod.o z_cusparse_mod.o : base_cusparse_mod.o +psb_d_cuda_hlg_mat_mod.o: hlldev_mod.o psb_d_cuda_vect_mod.o psb_cuda_env_mod.o +psb_d_cuda_elg_mat_mod.o: elldev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_diag_mat_mod.o: diagdev_mod.o psb_d_cuda_vect_mod.o +psb_d_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_d_cuda_vect_mod.o +psb_s_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_s_cuda_vect_mod.o +psb_d_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_d_cuda_vect_mod.o +psb_c_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_c_cuda_vect_mod.o +psb_z_cuda_dnsg_mat_mod.o: dnsdev_mod.o psb_z_cuda_vect_mod.o +psb_s_cuda_hlg_mat_mod.o: hlldev_mod.o psb_s_cuda_vect_mod.o psb_cuda_env_mod.o +psb_s_cuda_elg_mat_mod.o: elldev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_diag_mat_mod.o: diagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_hdiag_mat_mod.o: hdiagdev_mod.o psb_s_cuda_vect_mod.o +psb_s_cuda_csrg_mat_mod.o psb_s_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_d_cuda_csrg_mat_mod.o psb_d_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_hlg_mat_mod.o: hlldev_mod.o psb_z_cuda_vect_mod.o psb_cuda_env_mod.o +psb_z_cuda_elg_mat_mod.o: elldev_mod.o psb_z_cuda_vect_mod.o +psb_c_cuda_hlg_mat_mod.o: hlldev_mod.o psb_c_cuda_vect_mod.o psb_cuda_env_mod.o +psb_c_cuda_elg_mat_mod.o: elldev_mod.o psb_c_cuda_vect_mod.o +psb_c_cuda_csrg_mat_mod.o psb_c_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_z_cuda_csrg_mat_mod.o psb_z_cuda_hybg_mat_mod.o: cusparse_mod.o psb_vectordev_mod.o +psb_vectordev_mod.o: psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o psb_i_vectordev_mod.o +psb_i_vectordev_mod.o psb_s_vectordev_mod.o psb_d_vectordev_mod.o psb_c_vectordev_mod.o psb_z_vectordev_mod.o: psb_base_vectordev_mod.o +vectordev.o: cuda_util.o vectordev.h +elldev.o: elldev.c +dnsdev.o: dnsdev.c +fcusparse.h elldev.c: elldev.h vectordev.h +fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse.h +fcusparse.o scusparse.o dcusparse.o ccusparse.o zcusparse.o : fcusparse_fct.h +svectordev.o: svectordev.h vectordev.h +dvectordev.o: dvectordev.h vectordev.h +cvectordev.o: cvectordev.h vectordev.h +zvectordev.o: zvectordev.h vectordev.h +psb_cuda_env_mod.o: base_cusparse_mod.o +psb_cuda_mod.o: psb_cuda_env_mod.o psb_i_cuda_vect_mod.o\ + psb_d_cuda_vect_mod.o psb_s_cuda_vect_mod.o\ + psb_z_cuda_vect_mod.o psb_c_cuda_vect_mod.o\ + psb_d_cuda_elg_mat_mod.o psb_d_cuda_hlg_mat_mod.o \ + psb_d_cuda_hybg_mat_mod.o psb_d_cuda_csrg_mat_mod.o\ + psb_s_cuda_elg_mat_mod.o psb_s_cuda_hlg_mat_mod.o \ + psb_s_cuda_hybg_mat_mod.o psb_s_cuda_csrg_mat_mod.o\ + psb_c_cuda_elg_mat_mod.o psb_c_cuda_hlg_mat_mod.o \ + psb_c_cuda_hybg_mat_mod.o psb_c_cuda_csrg_mat_mod.o\ + psb_z_cuda_elg_mat_mod.o psb_z_cuda_hlg_mat_mod.o \ + psb_z_cuda_hybg_mat_mod.o psb_z_cuda_csrg_mat_mod.o\ + psb_d_cuda_diag_mat_mod.o \ + psb_d_cuda_hdiag_mat_mod.o psb_s_cuda_hdiag_mat_mod.o\ + psb_s_cuda_dnsg_mat_mod.o psb_d_cuda_dnsg_mat_mod.o \ + psb_c_cuda_dnsg_mat_mod.o psb_z_cuda_dnsg_mat_mod.o + +iobjs: $(FOBJS) + $(MAKE) -C impl objs +cudaobjs: $(FOBJS) + $(MAKE) -C CUDA objs + +ilib: objs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) +cudalib: objs ilib + $(MAKE) -C CUDA lib LIBNAME=$(LIBNAME) + +clean: cclean iclean cudaclean spgpuclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean +cudaclean: + $(MAKE) -C CUDA clean +spgpuclean: + $(MAKE) -C spgpu clean + +veryclean: clean diff --git a/cuda/base_cusparse_mod.F90 b/cuda/base_cusparse_mod.F90 new file mode 100644 index 00000000..94a8255f --- /dev/null +++ b/cuda/base_cusparse_mod.F90 @@ -0,0 +1,113 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module base_cusparse_mod + use iso_c_binding + ! Interface to CUSPARSE. + + enum, bind(c) + enumerator cusparse_status_success + enumerator cusparse_status_not_initialized + enumerator cusparse_status_alloc_failed + enumerator cusparse_status_invalid_value + enumerator cusparse_status_arch_mismatch + enumerator cusparse_status_mapping_error + enumerator cusparse_status_execution_failed + enumerator cusparse_status_internal_error + enumerator cusparse_status_matrix_type_not_supported + end enum + + enum, bind(c) + enumerator cusparse_matrix_type_general + enumerator cusparse_matrix_type_symmetric + enumerator cusparse_matrix_type_hermitian + enumerator cusparse_matrix_type_triangular + end enum + + enum, bind(c) + enumerator cusparse_fill_mode_lower + enumerator cusparse_fill_mode_upper + end enum + + enum, bind(c) + enumerator cusparse_diag_type_non_unit + enumerator cusparse_diag_type_unit + end enum + + enum, bind(c) + enumerator cusparse_index_base_zero + enumerator cusparse_index_base_one + end enum + + enum, bind(c) + enumerator cusparse_operation_non_transpose + enumerator cusparse_operation_transpose + enumerator cusparse_operation_conjugate_transpose + end enum + + enum, bind(c) + enumerator cusparse_direction_row + enumerator cusparse_direction_column + end enum + + interface + function FcusparseCreate() & + & bind(c,name="FcusparseCreate") result(res) + use iso_c_binding + integer(c_int) :: res + end function FcusparseCreate + end interface + + interface + function FcusparseDestroy() & + & bind(c,name="FcusparseDestroy") result(res) + use iso_c_binding + integer(c_int) :: res + end function FcusparseDestroy + end interface + +contains + + function initFcusparse() result(res) + implicit none + integer(c_int) :: res + + res = FcusparseCreate() + end function initFcusparse + + function closeFcusparse() result(res) + implicit none + integer(c_int) :: res + res = FcusparseDestroy() + end function closeFcusparse + +end module base_cusparse_mod diff --git a/cuda/c_cusparse_mod.F90 b/cuda/c_cusparse_mod.F90 new file mode 100644 index 00000000..07e57a55 --- /dev/null +++ b/cuda/c_cusparse_mod.F90 @@ -0,0 +1,312 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module c_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: c_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type c_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: c_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type c_Hmat +#endif + + interface CSRGDeviceFree + function c_CSRGDeviceFree(Mat) & + & bind(c,name="c_CSRGDeviceFree") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function c_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function c_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function c_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function c_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="c_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_CSRGDeviceSetMatIndexBase + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface CSRGDeviceCsrsmAnalysis + function c_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="c_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGDeviceCsrsmAnalysis + end interface +#else + interface CSRGIsNullSvBuffer + function c_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="c_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: res + end function c_CSRGIsNullSvBuffer + end interface +#endif + + interface CSRGDeviceAlloc + function c_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="c_CSRGDeviceAlloc") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function c_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function c_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="c_CSRGDeviceGetParms") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function c_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function c_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spsvCSRGDevice") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function c_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spmvCSRGDevice") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function c_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_CSRGHost2Device") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function c_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_CSRGDevice2Host") result(res) + use iso_c_binding + import c_Cmat + type(c_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <=10 + interface HYBGDeviceAlloc + function c_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="c_HYBGDeviceAlloc") result(res) + use iso_c_binding + import c_hmat + type(c_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function c_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function c_HYBGDeviceFree(Mat) & + & bind(c,name="c_HYBGDeviceFree") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int) :: res + end function c_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function c_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function c_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function c_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function c_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="c_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function c_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function c_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="c_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int) :: res + end function c_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function c_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spsvHYBGDevice") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function c_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="c_spmvHYBGDevice") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_float_complex), value :: alpha,beta + integer(c_int) :: res + end function c_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function c_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="c_HYBGHost2Device") result(res) + use iso_c_binding + import c_Hmat + type(c_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_float_complex) :: val(*) + integer(c_int) :: res + end function c_HYBGHost2Device + end interface +#endif + +end module c_cusparse_mod diff --git a/cuda/ccusparse.c b/cuda/ccusparse.c new file mode 100644 index 00000000..6b5c8ea6 --- /dev/null +++ b/cuda/ccusparse.c @@ -0,0 +1,42 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#include +#include +#include "fcusparse.h" + +#include "ccusparse.h" +#include "fcusparse_dat.h" +#include "fcusparse_fct.h" diff --git a/cuda/ccusparse.h b/cuda/ccusparse.h new file mode 100644 index 00000000..a5380a0a --- /dev/null +++ b/cuda/ccusparse.h @@ -0,0 +1,100 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef CCUSPARSE_ +#define CCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" + +/* Double precision real */ +#define TYPE float complex +#define CUSPARSE_BASE_TYPE CUDA_C_32F +#define T_CSRGDeviceMat c_CSRGDeviceMat +#define T_Cmat c_Cmat +#define T_spmvCSRGDevice c_spmvCSRGDevice +#define T_spsvCSRGDevice c_spsvCSRGDevice +#define T_CSRGDeviceAlloc c_CSRGDeviceAlloc +#define T_CSRGDeviceFree c_CSRGDeviceFree +#define T_CSRGHost2Device c_CSRGHost2Device +#define T_CSRGDevice2Host c_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode c_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType c_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms c_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis c_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseCcsrmv +#define cusparseTcsrsv_solve cusparseCcsrsv_solve +#define cusparseTcsrsv_analysis cusparseCcsrsv_analysis +#define T_HYBGDeviceMat c_HYBGDeviceMat +#define T_Hmat c_Hmat +#define T_HYBGDeviceFree c_HYBGDeviceFree +#define T_spmvHYBGDevice c_spmvHYBGDevice +#define T_HYBGDeviceAlloc c_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType c_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase c_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType c_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode c_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis c_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice c_spsvHYBGDevice +#define T_HYBGHost2Device c_HYBGHost2Device +#define cusparseThybmv cusparseChybmv +#define cusparseThybsv_solve cusparseChybsv_solve +#define cusparseThybsv_analysis cusparseChybsv_analysis +#define cusparseTcsr2hyb cusparseCcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType c_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase c_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis c_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseCcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseCcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseCcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer c_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr c_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr c_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr c_CSRGCreateSpMVDescr + +#endif + +#include "fcusparse.h" + +#endif diff --git a/cuda/cintrf.h b/cuda/cintrf.h new file mode 100644 index 00000000..58778e6c --- /dev/null +++ b/cuda/cintrf.h @@ -0,0 +1,44 @@ + /* Parallel Sparse BLAS SPGPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _CINTRF_H_ +#define _CINTRF_H_ + +#include +#include + +#include "core.h" + +#define ELL_PITCH_ALIGN_S 32 +#define ELL_PITCH_ALIGN_D 16 + +#endif diff --git a/cuda/core_mod.f90 b/cuda/core_mod.f90 new file mode 100644 index 00000000..d30f8a99 --- /dev/null +++ b/cuda/core_mod.f90 @@ -0,0 +1,53 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module core_mod + use iso_c_binding + + integer(c_int), parameter :: spgpu_type_int = 0 + integer(c_int), parameter :: spgpu_type_float = 1 + integer(c_int), parameter :: spgpu_type_double = 2 + integer(c_int), parameter :: spgpu_type_complex_float = 3 + integer(c_int), parameter :: spgpu_type_complex_double = 4 + integer(c_int), parameter :: spgpu_success = 0 + integer(c_int), parameter :: spgpu_unsupported = 1 + integer(c_int), parameter :: spgpu_unspecified = 2 + integer(c_int), parameter :: spgpu_outofmem = 3 + + interface + subroutine psb_cudaSync() & + & bind(c,name='cudaSync') + use iso_c_binding + end subroutine psb_cudaSync + end interface + +end module core_mod diff --git a/cuda/cuda_util.c b/cuda/cuda_util.c new file mode 100644 index 00000000..b7d0199e --- /dev/null +++ b/cuda/cuda_util.c @@ -0,0 +1,821 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include "cuda_util.h" + + +static int hasUVA=-1; +static struct cudaDeviceProp *prop=NULL; +static spgpuHandle_t psb_cuda_handle = NULL; +static cublasHandle_t psb_cublas_handle = NULL; +#if defined(TRACK_CUDA_MALLOC) +static long long total_cuda_mem = 0; +#endif + +int allocRemoteBuffer(void** buffer, int count) +{ + cudaError_t err = cudaMalloc(buffer, count); +#if defined(TRACK_CUDA_MALLOC) + total_cuda_mem += count; + fprintf(stderr,"Tracking CUDA allocRemoteBuffer for %ld bytes total %ld address %p\n", + count, total_cuda_mem, *buffer); +#endif + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA allocRemoteBuffer for %d bytes Error: %s \n", + count, cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int hostRegisterMapped(void *pointer, long size) +{ + cudaError_t err = cudaHostRegister(pointer, size, cudaHostRegisterMapped); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA hostRegisterMapped Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int getDevicePointer(void **d_p, void * h_p) +{ + cudaError_t err = cudaHostGetDevicePointer(d_p,h_p,0); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA getDevicePointer Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int registerMappedMemory(void *buffer, void **dp, int size) +{ + //cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + cudaError_t err = cudaHostRegister(buffer, size, cudaHostRegisterMapped); + if (err == cudaSuccess) err = cudaHostGetDevicePointer(dp,buffer,0); + + if (err == cudaSuccess) + { + err = cudaHostGetDevicePointer(dp,buffer,0); + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + } + else + { + fprintf(stderr,"CUDA registerMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int allocMappedMemory(void **buffer, void **dp, int size) +{ + cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + if (err == 0) err = cudaHostGetDevicePointer(dp,*buffer,0); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA allocMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int unregisterMappedMemory(void *buffer) +{ + //cudaError_t err = cudaHostAlloc(buffer,size,cudaHostAllocMapped); + cudaError_t err = cudaHostUnregister(buffer); + + if (err == cudaSuccess) + { + return SPGPU_SUCCESS; + } + else + { + fprintf(stderr,"CUDA unregisterMappedMemory Error: %s\n", cudaGetErrorString(err)); + if(err == cudaErrorMemoryAllocation) + return SPGPU_OUTOFMEMORY; + else + return SPGPU_UNSPECIFIED; + } +} + +int writeRemoteBuffer(void* hostSrc, void* buffer, int count) +{ + cudaError_t err = cudaMemcpy(buffer, hostSrc, count, cudaMemcpyHostToDevice); + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error writeRemoteBuffer: %s %p %p %d\n", + cudaGetErrorString(err),buffer, hostSrc, count); + return SPGPU_UNSPECIFIED; + } +} + +int readRemoteBuffer(void* hostDest, void* buffer, int count) +{ + + + cudaError_t err1; + cudaError_t err; +#if 0 + { + err1 =cudaGetLastError(); + fprintf(stderr,"CUDA Error prior to readRemoteBuffer: %s %d\n", + cudaGetErrorString(err1),err1); + } + +#endif + err = cudaMemcpy(hostDest, buffer, count, cudaMemcpyDeviceToHost); + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error readRemoteBuffer: %s %p %p %d %d\n", + cudaGetErrorString(err),hostDest,buffer,count,err); + return SPGPU_UNSPECIFIED; + } +} + +int freeRemoteBuffer(void* buffer) +{ + cudaError_t err = cudaFree(buffer); + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error freeRemoteBuffer: %s %p\n", cudaGetErrorString(err),buffer); + return SPGPU_UNSPECIFIED; + } +} + +int gpuInit(int dev) +{ + + int count,err; + + if ((err=cudaSetDeviceFlags(cudaDeviceMapHost))!=cudaSuccess) + fprintf(stderr,"Error On SetDeviceFlags: %d '%s'\n",err,cudaGetErrorString(err)); + if ((prop=(struct cudaDeviceProp *) malloc(sizeof(struct cudaDeviceProp)))==NULL) { + fprintf(stderr,"CUDA Error gpuInit3: not malloced prop\n"); + return SPGPU_UNSPECIFIED; + } + err = setDevice(dev); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error gpuInit2: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + if (!psb_cublas_handle) + psb_cudaCreateCublasHandle(); + hasUVA=getDeviceHasUVA(); + FcusparseCreate(); + return err; + +} + +void gpuClose() +{ + cudaStream_t st1, st2; + if (! psb_cuda_handle) + st1=spgpuGetStream(psb_cuda_handle); + if (! psb_cublas_handle) + cublasGetStream(psb_cublas_handle,&st2); + FcusparseDestroy(); + psb_cudaDestroyHandle(); + if (st1 != st2) + psb_cudaDestroyCublasHandle(); + free(prop); + prop=NULL; + hasUVA=-1; +} + + +int setDevice(int dev) +{ + int count,err,idev; + + err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error setDevice: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + + if ((0<=dev)&&(devunifiedAddressing; + return(count); +} + +int getGPUMultiProcessors() +{ int count=0; + if (prop!=NULL) + count = prop->multiProcessorCount; + return(count); +} + + +int getGPUMemoryBusWidth() +{ int count=0; +#if CUDART_VERSION >= 5000 + if (prop!=NULL) + count = prop->memoryBusWidth; +#endif + return(count); +} +int getGPUMemoryClockRate() +{ int count=0; +#if CUDART_VERSION >= 5000 + if (prop!=NULL) + count = prop->memoryClockRate; +#endif + return(count); +} +int getGPUWarpSize() +{ int count=0; + if (prop!=NULL) + count = prop->warpSize; + return(count); +} +int getGPUMaxThreadsPerBlock() +{ int count=0; + if (prop!=NULL) + count = prop->maxThreadsPerBlock; + return(count); +} +int getGPUMaxThreadsPerMP() +{ int count=0; + if (prop!=NULL) + count = prop->maxThreadsPerMultiProcessor; + return(count); +} +int getGPUMaxRegistersPerBlock() +{ int count=0; + if (prop!=NULL) + count = prop->regsPerBlock; + return(count); +} + +void cpyGPUNameString(char *cstring) +{ + *cstring='\0'; + if (prop!=NULL) + strcpy(cstring,prop->name); + +} + +int DeviceHasUVA() +{ + return(hasUVA == 1); +} + + +int getDeviceCount() +{ int count; + cudaError_t err; + err = cudaGetDeviceCount(&count); + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error getDeviceCount: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } + return(count); +} + +void cudaSync() +{ + cudaError_t err; + err = cudaDeviceSynchronize(); +#if 0 + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else { + fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } +#else + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error cudaSync: %s\n", cudaGetErrorString(err)); + } + return ; +#endif +} + +void cudaReset() +{ + cudaError_t err; + err = cudaDeviceReset(); +#if 0 + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); + return SPGPU_UNSPECIFIED; + } +#else + if (err != cudaSuccess) { + fprintf(stderr,"CUDA Error Reset: %s\n", cudaGetErrorString(err)); + } + return ; +#endif +} + + +spgpuHandle_t psb_cudaGetHandle() +{ + return psb_cuda_handle; +} + +void psb_cudaCreateHandle() +{ + if (!psb_cuda_handle) + spgpuCreate(&psb_cuda_handle, getDevice()); + +} + +void psb_cudaDestroyHandle() +{ + if (!psb_cuda_handle) + spgpuDestroy(psb_cuda_handle); + psb_cuda_handle = NULL; +} + +cudaStream_t psb_cudaGetStream() +{ + return spgpuGetStream(psb_cuda_handle); +} + +void psb_cudaSetStream(cudaStream_t stream) +{ + spgpuSetStream(psb_cuda_handle, stream); + return ; +} + + + +cublasHandle_t psb_cudaGetCublasHandle() +{ + if (!psb_cublas_handle) + psb_cudaCreateCublasHandle(); + return psb_cublas_handle; +} +void psb_cudaCreateCublasHandle() +{ if (!psb_cublas_handle) + cublasCreate(&psb_cublas_handle); +} +void psb_cudaDestroyCublasHandle() +{ + if (!psb_cublas_handle) + cublasDestroy(psb_cublas_handle); + psb_cublas_handle=NULL; +} + +/* Simple memory tools */ + +int allocateInt(void **d_int, int n) +{ + return allocRemoteBuffer((void **)(d_int), n*sizeof(int)); +} + +int writeInt(void *d_int, int* h_int, int n) +{ + int i,j; + int *di; + i = writeRemoteBuffer((void*)h_int, (void*)d_int, n*sizeof(int)); + return i; +} + +int readInt(void* d_int, int* h_int, int n) +{ int i; + i = readRemoteBuffer((void *) h_int, (void *) d_int, n*sizeof(int)); + //cudaSync(); + return(i); +} + +int writeIntFirst(int first, void *d_int, int* h_int, int n, int IndexBase) +{ + int i,j; + int *di=(int *) d_int; + di = &(di[first-IndexBase]); + i = writeRemoteBuffer((void*)h_int, (void*)di, n*sizeof(int)); + return i; +} + +int readIntFirst(int first,void* d_int, int* h_int, int n, int IndexBase) +{ int i; + int *di=(int *) d_int; + di = &(di[first-IndexBase]); + i = readRemoteBuffer((void *) h_int, (void *) di, n*sizeof(int)); + //cudaSync(); + return(i); +} + +int allocateMultiInt(void **d_int, int m, int n) +{ + return allocRemoteBuffer((void **)(d_int), m*n*sizeof(int)); +} + +int writeMultiInt(void *d_int, int* h_int, int m, int n) +{ + int i,j; + int *di; + i = writeRemoteBuffer((void*)h_int, (void*)d_int, m*n*sizeof(int)); + return i; +} + +int readMultiInt(void* d_int, int* h_int, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_int, (void *) d_int, m*n*sizeof(int)); + //cudaSync(); + return(i); +} + +void freeInt(void *d_int) +{ + //printf("Before freeInt\n"); + freeRemoteBuffer(d_int); +} + + + + +int allocateFloat(void **d_float, int n) +{ + return allocRemoteBuffer((void **)(d_float), n*sizeof(float)); +} + +int writeFloat(void *d_float, float* h_float, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_float, (void*)d_float, n*sizeof(float)); + + return i; +} + +int readFloat(void* d_float, float* h_float, int n) +{ int i; + i = readRemoteBuffer((void *) h_float, (void *) d_float, n*sizeof(float)); + + return(i); +} + +int writeFloatFirst(int df, void *d_float, float* h_float, int n, int IndexBase) +{ + int i; + + float *dv=(float *) d_float; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_float, (void*)dv, n*sizeof(float)); + + return i; +} + +int readFloatFirst(int df, void* d_float, float* h_float, int n, int IndexBase) +{ int i; + float *dv=(float *) d_float; + dv = &dv[df-IndexBase]; + //fprintf(stderr,"readFloatFirst: %d %p %p %p %d \n",df,d_float,dv,h_float,n); + i = readRemoteBuffer((void *) h_float, (void *) dv, n*sizeof(float)); + + return(i); +} + + +int allocateMultiFloat(void **d_float, int m, int n) +{ + return allocRemoteBuffer((void **)(d_float), m*n*sizeof(float)); +} + +int writeMultiFloat(void *d_float, float* h_float, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_float, (void*)d_float, m*n*sizeof(float)); + return i; +} + +int readMultiFloat(void* d_float, float* h_float, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_float, (void *) d_float, m*n*sizeof(float)); + //cudaSync(); + return(i); +} + +void freeFloat(void *d_float) +{ + freeRemoteBuffer(d_float); +} + + + +int allocateDouble(void **d_double, int n) +{ + return allocRemoteBuffer((void **)(d_double), n*sizeof(double)); +} + +int writeDouble(void *d_double, double* h_double, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_double, (void*)d_double, n*sizeof(double)); + + return i; +} + +int readDouble(void* d_double, double* h_double, int n) +{ int i; + i = readRemoteBuffer((void *) h_double, (void *) d_double, n*sizeof(double)); + + return(i); +} + +int writeDoubleFirst(int df, void *d_double, double* h_double, int n, int IndexBase) +{ + int i; + + double *dv=(double *) d_double; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_double, (void*)dv, n*sizeof(double)); + + return i; +} + +int readDoubleFirst(int df, void* d_double, double* h_double, int n, int IndexBase) +{ int i; + double *dv=(double *) d_double; + dv = &dv[df-IndexBase]; + //fprintf(stderr,"readDoubleFirst: %d %p %p %p %d \n",df,d_double,dv,h_double,n); + i = readRemoteBuffer((void *) h_double, (void *) dv, n*sizeof(double)); + + return(i); +} + +int allocateMultiDouble(void **d_double, int m, int n) +{ + return allocRemoteBuffer((void **)(d_double), m*n*sizeof(double)); +} + +int writeMultiDouble(void *d_double, double* h_double, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_double, (void*)d_double, m*n*sizeof(double)); + return i; +} + +int readMultiDouble(void* d_double, double* h_double, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_double, (void *) d_double, m*n*sizeof(double)); + //cudaSync(); + return(i); +} + +void freeDouble(void *d_double) +{ + freeRemoteBuffer(d_double); +} + + + +int allocateFloatComplex(void **d_FloatComplex, int n) +{ + return allocRemoteBuffer((void **)(d_FloatComplex), n*sizeof(cuFloatComplex)); +} + +int writeFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, n*sizeof(cuFloatComplex)); + + return i; +} + +int readFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int n) +{ int i; + i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, n*sizeof(cuFloatComplex)); + + return(i); +} + +int allocateMultiFloatComplex(void **d_FloatComplex, int m, int n) +{ + return allocRemoteBuffer((void **)(d_FloatComplex), m*n*sizeof(cuFloatComplex)); +} + +int writeMultiFloatComplex(void *d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_FloatComplex, (void*)d_FloatComplex, m*n*sizeof(cuFloatComplex)); + return i; +} + +int readMultiFloatComplex(void* d_FloatComplex, cuFloatComplex* h_FloatComplex, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_FloatComplex, (void *) d_FloatComplex, m*n*sizeof(cuFloatComplex)); + //cudaSync(); + return(i); +} + +int writeFloatComplexFirst(int df, void *d_floatComplex, + cuFloatComplex* h_floatComplex, int n, int IndexBase) +{ + int i; + + cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_floatComplex, (void*)dv, n*sizeof(cuFloatComplex)); + + return i; +} + +int readFloatComplexFirst(int df, void* d_floatComplex, cuFloatComplex* h_floatComplex, + int n, int IndexBase) +{ int i; + cuFloatComplex *dv=(cuFloatComplex *) d_floatComplex; + dv = &dv[df-IndexBase]; + i = readRemoteBuffer((void *) h_floatComplex, (void *) dv, n*sizeof(cuFloatComplex)); + + return(i); +} + +void freeFloatComplex(void *d_FloatComplex) +{ + freeRemoteBuffer(d_FloatComplex); +} + + + + +int allocateDoubleComplex(void **d_DoubleComplex, int n) +{ + return allocRemoteBuffer((void **)(d_DoubleComplex), n*sizeof(cuDoubleComplex)); +} + +int writeDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n) +{ + int i; + + i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, n*sizeof(cuDoubleComplex)); + + return i; +} + +int readDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int n) +{ int i; + i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, n*sizeof(cuDoubleComplex)); + + return(i); +} + +int writeDoubleComplexFirst(int df, void *d_doubleComplex, + cuDoubleComplex* h_doubleComplex, int n, int IndexBase) +{ + int i; + + cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex; + dv = &dv[df-IndexBase]; + i = writeRemoteBuffer((void*)h_doubleComplex, (void*)dv, n*sizeof(cuDoubleComplex)); + + return i; +} + +int readDoubleComplexFirst(int df, void* d_doubleComplex, cuDoubleComplex* h_doubleComplex, + int n, int IndexBase) +{ int i; + cuDoubleComplex *dv=(cuDoubleComplex *) d_doubleComplex; + dv = &dv[df-IndexBase]; + i = readRemoteBuffer((void *) h_doubleComplex, (void *) dv, n*sizeof(cuDoubleComplex)); + + return(i); +} + +int allocateMultiDoubleComplex(void **d_DoubleComplex, int m, int n) +{ + return allocRemoteBuffer((void **)(d_DoubleComplex), m*n*sizeof(cuDoubleComplex)); +} + +int writeMultiDoubleComplex(void *d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n) +{ + int i,j; + i = writeRemoteBuffer((void*)h_DoubleComplex, (void*)d_DoubleComplex, m*n*sizeof(cuDoubleComplex)); + return i; +} + +int readMultiDoubleComplex(void* d_DoubleComplex, cuDoubleComplex* h_DoubleComplex, int m, int n) +{ int i; + i = readRemoteBuffer((void *) h_DoubleComplex, (void *) d_DoubleComplex, m*n*sizeof(cuDoubleComplex)); + //cudaSync(); + return(i); +} + +void freeDoubleComplex(void *d_DoubleComplex) +{ + freeRemoteBuffer(d_DoubleComplex); +} + + + +double etime() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + + diff --git a/cuda/cuda_util.h b/cuda/cuda_util.h new file mode 100644 index 00000000..4eafb5bf --- /dev/null +++ b/cuda/cuda_util.h @@ -0,0 +1,138 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _CUDA_UTIL_H_ +#define _CUDA_UTIL_H_ + +#include +#include +#include +#include + +#include "cuda_runtime.h" +#include "core.h" +#include "cuComplex.h" +#include "fcusparse.h" +#include "cublas_v2.h" + +int allocRemoteBuffer(void** buffer, int count); +int allocMappedMemory(void **buffer, void **dp, int size); +int registerMappedMemory(void *buffer, void **dp, int size); +int unregisterMappedMemory(void *buffer); +int writeRemoteBuffer(void* hostSrc, void* buffer, int count); +int readRemoteBuffer(void* hostDest, void* buffer, int count); +int freeRemoteBuffer(void* buffer); +int gpuInit(int dev); +int getDeviceCount(); +int getDevice(); +int getDeviceHasUVA(); +int setDevice(int dev); +int getGPUMultiProcessors(); +int getGPUMemoryBusWidth(); +int getGPUMemoryClockRate(); +int getGPUWarpSize(); +int getGPUMaxThreadsPerBlock(); +int getGPUMaxThreadsPerMP(); +int getGPUMaxRegistersPerBlock(); +void cpyGPUNameString(char *cstring); + + +void cudaSync(); +void cudaReset(); +void gpuClose(); + + +spgpuHandle_t psb_cudaGetHandle(); +void psb_cudaCreateHandle(); +void psb_cudaDestroyHandle(); +cudaStream_t psb_cudaGetStream(); +void psb_cudaSetStream(cudaStream_t stream); + +cublasHandle_t psb_cudaGetCublasHandle(); +void psb_cudaCreateCublasHandle(); +void psb_cudaDestroyCublasHandle(); + + +int allocateInt(void **, int); +int allocateMultiInt(void **, int, int); +int writeInt(void *, int *, int); +int writeMultiInt(void *, int* , int , int ); +int readInt(void *, int *, int); +int readMultiInt(void*, int*, int, int ); +int writeIntFirst(int,void *, int *, int,int); +int readIntFirst(int,void *, int *, int,int); +void freeInt(void *); + +int allocateFloat(void **, int); +int allocateMultiFloat(void **, int, int); +int writeFloat(void *, float *, int); +int writeMultiFloat(void *, float* , int , int ); +int readFloat(void *, float*, int); +int readMultiFloat(void*, float*, int, int ); +int writeFloatFirst(int, void *, float*, int, int); +int readFloatFirst(int, void *, float*, int, int); +void freeFloat(void *); + +int allocateDouble(void **, int); +int allocateMultiDouble(void **, int, int); +int writeDouble(void *, double*, int); +int writeMultiDouble(void *, double* , int , int ); +int readDouble(void *, double*, int); +int readMultiDouble(void*, double*, int, int ); +int writeDoubleFirst(int, void *, double*, int, int); +int readDoubleFirst(int, void *, double*, int, int); +void freeDouble(void *); + +int allocateFloatComplex(void **, int); +int allocateMultiFloatComplex(void **, int, int); +int writeFloatComplex(void *, cuFloatComplex*, int); +int writeMultiFloatComplex(void *, cuFloatComplex* , int , int ); +int readFloatComplex(void *, cuFloatComplex*, int); +int readMultiFloatComplex(void*, cuFloatComplex*, int, int ); +int writeFloatComplexFirst(int, void *, cuFloatComplex*, int, int); +int readFloatComplexFirst(int, void *, cuFloatComplex*, int, int); +void freeFloatComplex(void *); + +int allocateDoubleComplex(void **, int); +int allocateMultiDoubleComplex(void **, int, int); +int writeDoubleComplex(void *, cuDoubleComplex*, int); +int writeMultiDoubleComplex(void *, cuDoubleComplex* , int , int ); +int readDoubleComplex(void *, cuDoubleComplex*, int); +int readMultiDoubleComplex(void*, cuDoubleComplex*, int, int ); +int writeDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int); +int readDoubleComplexFirst(int, void *, cuDoubleComplex*, int, int); +void freeDoubleComplex(void *); + +double etime(); + + +#endif diff --git a/cuda/cusparse_mod.F90 b/cuda/cusparse_mod.F90 new file mode 100644 index 00000000..4ae16cff --- /dev/null +++ b/cuda/cusparse_mod.F90 @@ -0,0 +1,38 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module cusparse_mod + use base_cusparse_mod + use s_cusparse_mod + use d_cusparse_mod + use c_cusparse_mod + use z_cusparse_mod +end module cusparse_mod diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c new file mode 100644 index 00000000..cdfda481 --- /dev/null +++ b/cuda/cvectordev.c @@ -0,0 +1,363 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "cvectordev.h" + + +int registerMappedFloatComplex(void *buff, void **d_p, int n, cuFloatComplex dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(cuFloatComplex)); +} + +int writeMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuFloatComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceFloatComplex(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatComplexR2",i); + } + return(i); +} + +int readMultiVecDeviceFloatComplex(void* deviceVec, cuFloatComplex* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuFloatComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); + } + return(i); +} + +int readMultiVecDeviceFloatComplexR2(void* deviceVec, cuFloatComplex* hostVec, int ld) +{ int i; + i = readMultiVecDeviceFloatComplex(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatComplexR2",i); + } + return(i); +} + +int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuCsetscal(handle, first, last, indexBase, val, (cuFloatComplex *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceFloatComplex(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + cuFloatComplex beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = make_cuFloatComplex(0.0, 0.0); + else if (dupl == INS_ADD) + beta = make_cuFloatComplex(1.0, 0.0); + else + beta = make_cuFloatComplex(0.0, 0.0); + + spgpuCscat(handle, (cuFloatComplex *) devVecX->v_, n, (cuFloatComplex*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceFloatComplex(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + cuFloatComplex *hv = (cuFloatComplex *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuCgath(handle,hv, n, idx,indexBase, + (cuFloatComplex *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void *deviceIdx, + int hfirst, void* host_values, + int indexBase, cuFloatComplex beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceFloatComplex(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, + indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuFloatComplex beta) +{ int i=0; + cuFloatComplex *hv = (cuFloatComplex *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuCscat(handle, (cuFloatComplex *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmnrm2(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmamax(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuCmasum(handle, y_res, n,(cuFloatComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuCscal(handle, (cuFloatComplex *)devVecA->v_, devVecA->pitch_, + alpha, (cuFloatComplex *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, + void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuCmdot(handle, y_res, n, (cuFloatComplex*)devVecA->v_, + (cuFloatComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVecX, + cuFloatComplex beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuCaxpby(handle,(cuFloatComplex*)devVecY->v_+pitch*j, n, beta, + (cuFloatComplex*)devVecY->v_+pitch*j, alpha, + (cuFloatComplex*) devVecX->v_+pitch*j); + return(i); +} + +int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCabgdxyz(handle,n, alpha,beta,gamma,delta, + (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,(cuFloatComplex *) devVecZ->v_); + return(i); +} + +int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCxyzw(handle,n, a,b,c,d,e,f, + (cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_, + (cuFloatComplex *) devVecZ->v_,(cuFloatComplex *) devVecW->v_); + return(i); +} + +int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, + void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCmaxy(handle, (cuFloatComplex*)devVecB->v_, n, alpha, + (cuFloatComplex*)devVecA->v_, + (cuFloatComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB, cuFloatComplex beta, + void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuCmaxypbz(handle, (cuFloatComplex*)devVecZ->v_, n, beta, + (cuFloatComplex*)devVecZ->v_, + alpha, (cuFloatComplex*) devVecA->v_, (cuFloatComplex*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + + +int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuCabs(handle, (cuFloatComplex*)devVecB->v_, n, + alpha, (cuFloatComplex*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_cudaGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuCabs(handle, (cuFloatComplex*)devVecA->v_, n, + alpha, (cuFloatComplex*)devVecA->v_); + + return(i); +} + + diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h new file mode 100644 index 00000000..62693e27 --- /dev/null +++ b/cuda/cvectordev.h @@ -0,0 +1,86 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedFloatComplex(void *, void **, int, cuFloatComplex); +int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); +int writeMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld); +int readMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); +int readMultiVecDeviceFloatComplexR2(void* deviceMultiVec, cuFloatComplex* hostMultiVec, int ld); + +int setscalMultiVecDeviceFloatComplex(cuFloatComplex val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceFloatComplex(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceFloatComplexVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, cuFloatComplex beta); +int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, cuFloatComplex beta); + +int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA); +int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); +int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); +int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); +int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); +int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha,cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex a,cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); +int axyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA, + void *deviceVecB, cuFloatComplex beta, void *deviceVecZ); +int absMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void *deviceVecA); +int absMultiVecDeviceFloatComplex2(int n, cuFloatComplex alpha, + void *deviceVecA, void *deviceVecB); + diff --git a/cuda/d_cusparse_mod.F90 b/cuda/d_cusparse_mod.F90 new file mode 100644 index 00000000..399ac085 --- /dev/null +++ b/cuda/d_cusparse_mod.F90 @@ -0,0 +1,313 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module d_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: d_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type d_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: d_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type d_Hmat +#endif + + interface CSRGDeviceFree + function d_CSRGDeviceFree(Mat) & + & bind(c,name="d_CSRGDeviceFree") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function d_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function d_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function d_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function d_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="d_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_CSRGDeviceSetMatIndexBase + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface CSRGDeviceCsrsmAnalysis + function d_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="d_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGDeviceCsrsmAnalysis + end interface +#else + interface CSRGIsNullSvBuffer + function d_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="d_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: res + end function d_CSRGIsNullSvBuffer + end interface +#endif + + interface CSRGDeviceAlloc + function d_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="d_CSRGDeviceAlloc") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function d_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function d_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="d_CSRGDeviceGetParms") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function d_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function d_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spsvCSRGDevice") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function d_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spmvCSRGDevice") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function d_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_CSRGHost2Device") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function d_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_CSRGDevice2Host") result(res) + use iso_c_binding + import d_Cmat + type(d_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function d_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="d_HYBGDeviceAlloc") result(res) + use iso_c_binding + import d_hmat + type(d_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function d_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function d_HYBGDeviceFree(Mat) & + & bind(c,name="d_HYBGDeviceFree") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int) :: res + end function d_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function d_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function d_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function d_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function d_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="d_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function d_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function d_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="d_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int) :: res + end function d_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function d_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spsvHYBGDevice") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function d_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="d_spmvHYBGDevice") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + real(c_double), value :: alpha,beta + integer(c_int) :: res + end function d_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function d_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="d_HYBGHost2Device") result(res) + use iso_c_binding + import d_Hmat + type(d_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + real(c_double) :: val(*) + integer(c_int) :: res + end function d_HYBGHost2Device + end interface + +#endif + +end module d_cusparse_mod diff --git a/cuda/dcusparse.c b/cuda/dcusparse.c new file mode 100644 index 00000000..9af4ce38 --- /dev/null +++ b/cuda/dcusparse.c @@ -0,0 +1,42 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#include +#include +#include "fcusparse.h" + +#include "dcusparse.h" +#include "fcusparse_dat.h" +#include "fcusparse_fct.h" diff --git a/cuda/dcusparse.h b/cuda/dcusparse.h new file mode 100644 index 00000000..34849b1e --- /dev/null +++ b/cuda/dcusparse.h @@ -0,0 +1,101 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef DCUSPARSE_ +#define DCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" + +/* Double precision real */ +#define TYPE double +#define CUSPARSE_BASE_TYPE CUDA_R_64F +#define T_CSRGDeviceMat d_CSRGDeviceMat +#define T_Cmat d_Cmat +#define T_spmvCSRGDevice d_spmvCSRGDevice +#define T_spsvCSRGDevice d_spsvCSRGDevice +#define T_CSRGDeviceAlloc d_CSRGDeviceAlloc +#define T_CSRGDeviceFree d_CSRGDeviceFree +#define T_CSRGHost2Device d_CSRGHost2Device +#define T_CSRGDevice2Host d_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode d_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType d_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms d_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis d_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseDcsrmv +#define cusparseTcsrsv_solve cusparseDcsrsv_solve +#define cusparseTcsrsv_analysis cusparseDcsrsv_analysis +#define T_HYBGDeviceMat d_HYBGDeviceMat +#define T_Hmat d_Hmat +#define T_HYBGDeviceFree d_HYBGDeviceFree +#define T_spmvHYBGDevice d_spmvHYBGDevice +#define T_HYBGDeviceAlloc d_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType d_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase d_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType d_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode d_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis d_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice d_spsvHYBGDevice +#define T_HYBGHost2Device d_HYBGHost2Device +#define cusparseThybmv cusparseDhybmv +#define cusparseThybsv_solve cusparseDhybsv_solve +#define cusparseThybsv_analysis cusparseDhybsv_analysis +#define cusparseTcsr2hyb cusparseDcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType d_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase d_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis d_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseDcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseDcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseDcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer d_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr d_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr d_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr d_CSRGCreateSpMVDescr + +#endif + +#include "fcusparse.h" + +#endif + diff --git a/cuda/diagdev.c b/cuda/diagdev.c new file mode 100644 index 00000000..0cf78a41 --- /dev/null +++ b/cuda/diagdev.c @@ -0,0 +1,261 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "diagdev.h" +#include +#include +#include +#include +//new +DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, unsigned int diags, unsigned int elementType) +{ + DiagDeviceParams params; + + params.elementType = elementType; + //numero di elementi di val + params.rows = rows; + params.columns = columns; + params.diags = diags; + + return params; + +} +//new +int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params) +{ + struct DiagDevice *tmp = (struct DiagDevice *)malloc(sizeof(struct DiagDevice)); + int ret=SPGPU_SUCCESS; + *remoteMatrix = (void *)tmp; + + tmp->rows = params->rows; + + tmp->cols = params->columns; + + tmp->diags = params->diags; + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->off), tmp->diags*sizeof(int)); + + /* tmp->baseIndex = params->firstIndex; */ + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->rows*tmp->diags*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +void freeDiagDevice(void* remoteMatrix) +{ + struct DiagDevice *devMat = (struct DiagDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->off); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int columns,unsigned int diags,unsigned int elementType) +{ int i; + DiagDeviceParams p; + + p = getDiagDeviceParams(rows, columns, diags,elementType); + i = allocDiagDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +} + +int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n) +{ int i,fo,fa; + char buf_a[255], buf_o[255],tmp[255]; + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + // Ex updateFromHost function + /* memset(buf_a,'\0',255); */ + /* memset(buf_o,'\0',255); */ + /* memset(tmp,'\0',255); */ + + /* strcat(buf_a,"mat_"); */ + /* strcat(buf_o,"off_"); */ + /* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */ + /* strcat(buf_a,tmp); */ + /* memset(tmp,'\0',255); */ + /* sprintf(tmp,"%d.dat",devMat->cols); */ + /* strcat(buf_o,tmp); */ + + /* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + /* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + + /* i = write(fa, a, sizeof(double)*devMat->cols*devMat->rows); */ + /* i = write(fo, off, sizeof(int)*devMat->cols); */ + + /* close(fa); */ + /* close(fo); */ + + i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(double)); + i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int)); + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int readDiagDeviceDouble(void* deviceMat, double* a, int* off) +{ int i; + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); + i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +//new +int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /* spgpuDdiagspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */ + + spgpuDdiaspmv(handle, (double *)y->v_, (double *)y->v_,alpha,(double *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +} + + +int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n) +{ int i,fo,fa; + char buf_a[255], buf_o[255],tmp[255]; + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + // Ex updateFromHost function + /* memset(buf_a,'\0',255); */ + /* memset(buf_o,'\0',255); */ + /* memset(tmp,'\0',255); */ + + /* strcat(buf_a,"mat_"); */ + /* strcat(buf_o,"off_"); */ + /* sprintf(tmp,"%d_%d.dat",devMat->rows,devMat->cols); */ + /* strcat(buf_a,tmp); */ + /* memset(tmp,'\0',255); */ + /* sprintf(tmp,"%d.dat",devMat->cols); */ + /* strcat(buf_o,tmp); */ + + /* fa = open(buf_a, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + /* fo = open(buf_o, O_CREAT | O_WRONLY | O_TRUNC, 0664); */ + + /* i = write(fa, a, sizeof(float)*devMat->cols*devMat->rows); */ + /* i = write(fo, off, sizeof(int)*devMat->cols); */ + + /* close(fa); */ + /* close(fo); */ + + i = writeRemoteBuffer((void*) a, (void *)devMat->cM, devMat->rows*devMat->diags*sizeof(float)); + i = writeRemoteBuffer((void*) off, (void *)devMat->off, devMat->diags*sizeof(int)); + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int readDiagDeviceFloat(void* deviceMat, float* a, int* off) +{ int i; + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); + i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +//new +int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + struct DiagDevice *devMat = (struct DiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /* spgpuDdiagspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->cols,x->v_,beta,devMat->baseIndex); */ + + spgpuSdiaspmv(handle, (float *)y->v_, (float *)y->v_,alpha,(float *)devMat->cM,devMat->off,devMat->rows,devMat->rows,devMat->cols,devMat->diags,x->v_,beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +} + diff --git a/cuda/diagdev.h b/cuda/diagdev.h new file mode 100644 index 00000000..3a062fb7 --- /dev/null +++ b/cuda/diagdev.h @@ -0,0 +1,91 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _DIAGDEV_H_ +#define _DIAGDEV_H_ + +#include "cintrf.h" +#include "vectordev.h" +#include "dia.h" + +struct DiagDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // offset (same size of cM) + int *off; + + int rows; + + int cols; + + int diags; + +}; + +typedef struct DiagDeviceParams +{ + + unsigned int elementType; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + unsigned int diags; + +} DiagDeviceParams; +DiagDeviceParams getDiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int elementType, unsigned int firstIndex); +int FallocDiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int elementType, unsigned int firstIndex); +int allocDiagDevice(void ** remoteMatrix, DiagDeviceParams* params); +void freeDiagDevice(void* remoteMatrix); + +int readDiagDeviceDouble(void* deviceMat, double* a, int* off); +int writeDiagDeviceDouble(void* deviceMat, double* a, int* off, int n); +int spmvDiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); + +int readDiagDeviceFloat(void* deviceMat, float* a, int* off); +int writeDiagDeviceFloat(void* deviceMat, float* a, int* off, int n); +int spmvDiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); + + + +#endif diff --git a/cuda/diagdev_mod.F90 b/cuda/diagdev_mod.F90 new file mode 100644 index 00000000..70d58d4e --- /dev/null +++ b/cuda/diagdev_mod.F90 @@ -0,0 +1,224 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module diagdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: diagdev_parms + integer(c_int) :: element_type + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: firstIndex + end type diagdev_parms + + interface + function FgetDiagDeviceParams(rows, columns, elementType, firstIndex) & + & result(res) bind(c,name='getDiagDeviceParams') + use iso_c_binding + import :: diagdev_parms + type(diagdev_parms) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + end function FgetDiagDeviceParams + end interface + + + interface + function FallocDiagDevice(deviceMat,rows,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocDiagDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocDiagDevice + end interface + + interface writeDiagDevice + + function writeDiagDeviceFloat(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + real(c_float) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceFloat + + function writeDiagDeviceDouble(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int),value :: n + real(c_double) :: a(n,*) + integer(c_int) :: off(*) + end function writeDiagDeviceDouble + + function writeDiagDeviceFloatComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_float_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceFloatComplex + + function writeDiagDeviceDoubleComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='writeDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_double_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function writeDiagDeviceDoubleComplex + + end interface + + interface readDiagDevice + + function readDiagDeviceFloat(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceFloat + + function readDiagDeviceDouble(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int),value :: n + real(c_double) :: a(n,*) + integer(c_int) :: off(*) + end function readDiagDeviceDouble + + function readDiagDeviceFloatComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_float_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceFloatComplex + + function readDiagDeviceDoubleComplex(deviceMat,a,off,n) & + & result(res) bind(c,name='readDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: n + complex(c_double_complex) :: a(n,*) + integer(c_int) :: off(*)!,irn(*) + end function readDiagDeviceDoubleComplex + + end interface + + interface + subroutine freeDiagDevice(deviceMat) & + & bind(c,name='freeDiagDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeDiagDevice + end interface + + interface + subroutine resetDiagTimer() bind(c,name='resetDiagTimer') + use iso_c_binding + end subroutine resetDiagTimer + end interface + interface + function getDiagTimer() & + & bind(c,name='getDiagTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getDiagTimer + end interface + + interface + function getDiagDevicePitch(deviceMat) & + & bind(c,name='getDiagDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDiagDevicePitch + end interface + + interface + function getDiagDeviceMaxRowSize(deviceMat) & + & bind(c,name='getDiagDeviceMaxRowSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDiagDeviceMaxRowSize + end interface + + + interface spmvDiagDevice + function spmvDiagDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvDiagDeviceFloat + function spmvDiagDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvDiagDeviceDouble + function spmvDiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvDiagDeviceFloatComplex + function spmvDiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvDiagDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvDiagDeviceDoubleComplex + end interface spmvDiagDevice + +end module diagdev_mod diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c new file mode 100644 index 00000000..0a991012 --- /dev/null +++ b/cuda/dnsdev.c @@ -0,0 +1,321 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#include "dnsdev.h" + +#define PASS_RS 0 + +#define IMIN(a,b) ((a)<(b) ? (a) : (b)) + +DnsDeviceParams getDnsDeviceParams(unsigned int rows, unsigned int columns, + unsigned int elementType, unsigned int firstIndex) +{ + DnsDeviceParams params; + + if (elementType == SPGPU_TYPE_DOUBLE) + { + params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D; + } + else + { + params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S; + } + //For complex? + params.elementType = elementType; + params.rows = rows; + params.columns = columns; + params.firstIndex = firstIndex; + + return params; + +} +//new +int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params) +{ + struct DnsDevice *tmp = (struct DnsDevice *)malloc(sizeof(struct DnsDevice)); + *remoteMatrix = (void *)tmp; + tmp->rows = params->rows; + tmp->columns = params->columns; + tmp->cMPitch = params->pitch; + tmp->pitch= tmp->cMPitch; + tmp->allocsize = (int)tmp->columns * tmp->pitch; + tmp->baseIndex = params->firstIndex; + //fprintf(stderr,"allocDnsDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize); + if (params->elementType == SPGPU_TYPE_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + else if (params->elementType == SPGPU_TYPE_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocDnsDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return SPGPU_SUCCESS; +} + +void freeDnsDevice(void* remoteMatrix) +{ + struct DnsDevice *devMat = (struct DnsDevice *) remoteMatrix; + //fprintf(stderr,"freeDnsDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocDnsDevice(void** deviceMat, unsigned int rows, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ int i; + DnsDeviceParams p; + + p = getDnsDeviceParams(rows, columns, elementType, firstIndex); + i = allocDnsDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocDnsDevice",i); + } + return(i); +} + + +int spmvDnsDeviceFloat(char transa, int m, int n, int k, float *alpha, + void *deviceMat, void* deviceX, float *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; + + cublasHandle_t handle=psb_cudaGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasSgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasSgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int spmvDnsDeviceDouble(char transa, int m, int n, int k, double *alpha, + void *deviceMat, void* deviceX, double *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; + + cublasHandle_t handle=psb_cudaGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasDgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasDgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *alpha, + void *deviceMat, void* deviceX, float complex *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; + + cublasHandle_t handle=psb_cudaGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasCgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + +int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex *alpha, + void *deviceMat, void* deviceX, double complex *beta, void* deviceY) +{ + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + int status; + + cublasHandle_t handle=psb_cudaGetCublasHandle(); + cublasOperation_t trans=((transa == 'N')? CUBLAS_OP_N:((transa=='T')? CUBLAS_OP_T:CUBLAS_OP_C)); + /* Note: the M,N,K choices according to TRANS have already been handled in the caller */ + if (n == 1) { + status = cublasZgemv(handle, trans, m,k, + alpha, devMat->cM,devMat->pitch, x->v_,1, + beta, y->v_,1); + } else { + status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k, + alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, + beta, y->v_,y->pitch_); + } + + if (status == CUBLAS_STATUS_SUCCESS) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + + +int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloat",i); + } + return SPGPU_SUCCESS; +} + +int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + + +int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +} + +int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasSetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeDnsDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +} + + +int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(float), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloat",i); + } + return SPGPU_SUCCESS; +} + +int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(double), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + + +int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(cuFloatComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +} + +int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + int pitch=devMat->pitch; + i = cublasGetMatrix(lda,nc,sizeof(cuDoubleComplex), (void*) val,lda, (void *)devMat->cM, pitch); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readDnsDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +} + + +int getDnsDevicePitch(void* deviceMat) +{ int i; + struct DnsDevice *devMat = (struct DnsDevice *) deviceMat; + i = devMat->pitch; + return(i); +} + diff --git a/cuda/dnsdev.h b/cuda/dnsdev.h new file mode 100644 index 00000000..aa536105 --- /dev/null +++ b/cuda/dnsdev.h @@ -0,0 +1,118 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _DNSDEV_H_ +#define _DNSDEV_H_ + +#include "cintrf.h" +#include "vectordev.h" +#include "cuComplex.h" +#include "cublas_v2.h" + + +struct DnsDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + + //matrix size (uncompressed) + int rows; + int columns; + + int pitch; //old + + int cMPitch; + + //allocation size (in elements) + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; +}; + +typedef struct DnsDeviceParams +{ + // The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + // First index (e.g 0 or 1) + unsigned int firstIndex; +} DnsDeviceParams; + +int FallocDnsDevice(void** deviceMat, unsigned int rows, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex); +int allocDnsDevice(void ** remoteMatrix, DnsDeviceParams* params); +void freeDnsDevice(void* remoteMatrix); + +int writeDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc); +int writeDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc); +int writeDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc); +int writeDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc); + +int readDnsDeviceFloat(void* deviceMat, float* val, int lda, int nc); +int readDnsDeviceDouble(void* deviceMat, double* val, int lda, int nc); +int readDnsDeviceFloatComplex(void* deviceMat, float complex* val, int lda, int nc); +int readDnsDeviceDoubleComplex(void* deviceMat, double complex* val, int lda, int nc); + +int spmvDnsDeviceFloat(char transa, int m, int n, int k, + float *alpha, void *deviceMat, void* deviceX, + float *beta, void* deviceY); +int spmvDnsDeviceDouble(char transa, int m, int n, int k, + double *alpha, void *deviceMat, void* deviceX, + double *beta, void* deviceY); +int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, + float complex *alpha, void *deviceMat, void* deviceX, + float complex *beta, void* deviceY); +int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, + double complex *alpha, void *deviceMat, void* deviceX, + double complex *beta, void* deviceY); + +int getDnsDevicePitch(void* deviceMat); + +// sparse Dns matrix-vector product +//int spmvDnsDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); +//int spmvDnsDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); + +#endif diff --git a/cuda/dnsdev_mod.F90 b/cuda/dnsdev_mod.F90 new file mode 100644 index 00000000..fd257e0e --- /dev/null +++ b/cuda/dnsdev_mod.F90 @@ -0,0 +1,270 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module dnsdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: dnsdev_parms + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: maxRowSize + integer(c_int) :: avgRowSize + integer(c_int) :: firstIndex + end type dnsdev_parms + + interface + function FgetDnsDeviceParams(rows, columns, elementType, firstIndex) & + & result(res) bind(c,name='getDnsDeviceParams') + use iso_c_binding + import :: dnsdev_parms + type(dnsdev_parms) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + end function FgetDnsDeviceParams + end interface + + + interface + function FallocDnsDevice(deviceMat,rows,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocDnsDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocDnsDevice + end interface + + + interface writeDnsDevice + + function writeDnsDeviceFloat(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_float) :: val(lda,*) + end function writeDnsDeviceFloat + + + function writeDnsDeviceDouble(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_double) :: val(lda,*) + end function writeDnsDeviceDouble + + + function writeDnsDeviceFloatComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceFloatComple') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_float_complex) :: val(lda,*) + end function writeDnsDeviceFloatComplex + + + function writeDnsDeviceDoubleComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='writeDnsDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_double_complex) :: val(lda,*) + end function writeDnsDeviceDoubleComplex + + end interface + + interface readDnsDevice + + function readDnsDeviceFloat(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_float) :: val(lda,*) + end function readDnsDeviceFloat + + + function readDnsDeviceDouble(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + real(c_double) :: val(lda,*) + end function readDnsDeviceDouble + + + function readDnsDeviceFloatComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceFloatComple') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_float_complex) :: val(lda,*) + end function readDnsDeviceFloatComplex + + + function readDnsDeviceDoubleComplex(deviceMat,val,lda,nc) & + & result(res) bind(c,name='readDnsDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: lda,nc + complex(c_double_complex) :: val(lda,*) + end function readDnsDeviceDoubleComplex + + end interface + + interface + subroutine freeDnsDevice(deviceMat) & + & bind(c,name='freeDnsDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeDnsDevice + end interface + + interface + subroutine resetDnsTimer() bind(c,name='resetDnsTimer') + use iso_c_binding + end subroutine resetDnsTimer + end interface + interface + function getDnsTimer() & + & bind(c,name='getDnsTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getDnsTimer + end interface + + + interface + function getDnsDevicePitch(deviceMat) & + & bind(c,name='getDnsDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getDnsDevicePitch + end interface + +!!$ interface csputDnsDeviceFloat +!!$ function dev_csputDnsDeviceFloat(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloat') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceFloat +!!$ end interface +!!$ +!!$ interface csputDnsDeviceDouble +!!$ function dev_csputDnsDeviceDouble(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceDouble') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceDouble +!!$ end interface +!!$ +!!$ interface csputDnsDeviceFloatComplex +!!$ function dev_csputDnsDeviceFloatComplex(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceFloatComplex +!!$ end interface +!!$ +!!$ interface csputDnsDeviceDoubleComplex +!!$ function dev_csputDnsDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) & +!!$ & result(res) bind(c,name='dev_csputDnsDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat , ia, ja, val +!!$ integer(c_int), value :: nnz +!!$ end function dev_csputDnsDeviceDoubleComplex +!!$ end interface + + interface spmvDnsDevice + function spmvDnsDeviceFloat(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceFloat') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float) :: alpha, beta + end function spmvDnsDeviceFloat + + function spmvDnsDeviceDouble(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceDouble') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double) :: alpha, beta + end function spmvDnsDeviceDouble + + function spmvDnsDeviceFloatComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceFloatComplex') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex) :: alpha, beta + end function spmvDnsDeviceFloatComplex + + function spmvDnsDeviceDoubleComplex(transa,m,n,k,alpha,deviceMat,x,beta,y) & + & result(res) bind(c,name='spmvDnsDeviceDoubleComplex') + use iso_c_binding + character(c_char), value :: transa + integer(c_int), value :: m, n, k + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex) :: alpha, beta + end function spmvDnsDeviceDoubleComplex + + end interface + +end module dnsdev_mod diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c new file mode 100644 index 00000000..723f48d8 --- /dev/null +++ b/cuda/dvectordev.c @@ -0,0 +1,337 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "dvectordev.h" + + +int registerMappedDouble(void *buff, void **d_p, int n, double dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(double)); +} + +int writeMultiVecDeviceDouble(void* deviceVec, double* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(double)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceDouble(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleR2",i); + } + return(i); +} + +int readMultiVecDeviceDouble(void* deviceVec, double* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(double)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDouble",i); + } + return(i); +} + +int readMultiVecDeviceDoubleR2(void* deviceVec, double* hostVec, int ld) +{ int i; + i = readMultiVecDeviceDouble(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleR2",i); + } + return(i); +} + +int setscalMultiVecDeviceDouble(double val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuDsetscal(handle, first, last, indexBase, val, (double *) devVecX->v_); + + return(i); +} + + +int geinsMultiVecDeviceDouble(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + double beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0.0; + else if (dupl == INS_ADD) + beta = 1.0; + else + beta = 0.0; + + spgpuDscat(handle, (double *) devVecX->v_, n, (double*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceDouble(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + double *hv = (double *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuDgath(handle,hv, n, idx,indexBase, (double *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, double beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceDouble(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, double beta) +{ int i=0; + double *hv = (double *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuDscat(handle, (double *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + +int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuDscal(handle, (double *)devVecA->v_, devVecA->pitch_, + alpha, (double *)devVecA->v_); + return(i); +} + +int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmnrm2(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmamax(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuDmasum(handle, y_res, n,(double *)devVecA->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX, + double beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuDaxpby(handle,(double*)devVecY->v_+pitch*j, n, beta, + (double*)devVecY->v_+pitch*j, alpha,(double*) devVecX->v_+pitch*j); + return(i); +} + +int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta, + (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_); + return(i); +} + +int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDxyzw(handle,n, a,b,c,d,e,f, + (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_,(double*) devVecW->v_); + return(i); +} + +int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDmaxy(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_, + (double*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, + void *deviceVecB, double beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuDmaxypbz(handle, (double*)devVecZ->v_, n, beta, (double*)devVecZ->v_, + alpha, (double*) devVecA->v_, (double*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + +int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuDabs(handle, (double*)devVecB->v_, n, alpha, (double*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_cudaGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuDabs(handle, (double*)devVecA->v_, n, alpha, (double*)devVecA->v_); + + return(i); +} + diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h new file mode 100644 index 00000000..c2bfa1b5 --- /dev/null +++ b/cuda/dvectordev.h @@ -0,0 +1,80 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedDouble(void *, void **, int, double); +int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); +int writeMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld); +int readMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); +int readMultiVecDeviceDoubleR2(void* deviceMultiVec, double* hostMultiVec, int ld); + +int setscalMultiVecDeviceDouble(double val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceDouble(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceDoubleVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, double beta); +int iscatMultiVecDeviceDouble(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, double beta); + +int scalMultiVecDeviceDouble(double alpha, void* devMultiVecA); +int nrm2MultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int amaxMultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA); +int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY); +int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); +int axyMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceDouble(int n, double alpha, void *deviceVecA, + void *deviceVecB, double beta, void *deviceVecZ); +int absMultiVecDeviceDouble(int n, double alpha, void *deviceVecA); +int absMultiVecDeviceDouble2(int n, double alpha, void *deviceVecA, void *deviceVecB); + diff --git a/cuda/elldev.c b/cuda/elldev.c new file mode 100644 index 00000000..a5d893c1 --- /dev/null +++ b/cuda/elldev.c @@ -0,0 +1,686 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#include "elldev.h" + +#define PASS_RS 0 + +EllDeviceParams getEllDeviceParams(unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ + EllDeviceParams params; + + if (elementType == SPGPU_TYPE_DOUBLE) + { + params.pitch = ((rows + ELL_PITCH_ALIGN_D - 1)/ELL_PITCH_ALIGN_D)*ELL_PITCH_ALIGN_D; + } + else + { + params.pitch = ((rows + ELL_PITCH_ALIGN_S - 1)/ELL_PITCH_ALIGN_S)*ELL_PITCH_ALIGN_S; + } + //For complex? + params.elementType = elementType; + + params.rows = rows; + params.maxRowSize = maxRowSize; + params.avgRowSize = (nnzeros+rows-1)/rows; + params.columns = columns; + params.firstIndex = firstIndex; + + //params.pitch = computeEllAllocPitch(rows); + + return params; + +} +//new +int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params) +{ + struct EllDevice *tmp = (struct EllDevice *)malloc(sizeof(struct EllDevice)); + *remoteMatrix = (void *)tmp; + tmp->rows = params->rows; + tmp->cMPitch = computeEllAllocPitch(tmp->rows); + tmp->rPPitch = tmp->cMPitch; + tmp->pitch= tmp->cMPitch; + tmp->maxRowSize = params->maxRowSize; + tmp->avgRowSize = params->avgRowSize; + tmp->allocsize = (int)tmp->maxRowSize * tmp->pitch; + //tmp->allocsize = (int)params->maxRowSize * tmp->cMPitch; + allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int)); + allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int)); + tmp->columns = params->columns; + tmp->baseIndex = params->firstIndex; + tmp->dataType = params->elementType; + //fprintf(stderr,"allocEllDevice: %d %d %d \n",tmp->pitch, params->maxRowSize, params->avgRowSize); + if (params->elementType == SPGPU_TYPE_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + else if (params->elementType == SPGPU_TYPE_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + else + return SPGPU_UNSUPPORTED; // Unsupported params + //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return SPGPU_SUCCESS; +} + +//new +void zeroEllDevice(void *remoteMatrix) +{ + struct EllDevice *tmp = (struct EllDevice *) remoteMatrix; + + if (tmp->dataType == SPGPU_TYPE_FLOAT) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(float)); + else if (tmp->dataType == SPGPU_TYPE_DOUBLE) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(double)); + else if (tmp->dataType == SPGPU_TYPE_COMPLEX_FLOAT) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuFloatComplex)); + else if (tmp->dataType == SPGPU_TYPE_COMPLEX_DOUBLE) + cudaMemset((void *)tmp->cM, 0, tmp->allocsize*sizeof(cuDoubleComplex)); + else + return ; // Unsupported params + //fprintf(stderr,"From allocEllDevice: %d %d %d %p %p %p\n",tmp->maxRowSize, + // tmp->avgRowSize,tmp->allocsize,tmp->rS,tmp->rP,tmp->cM); + + return; +} + + +void freeEllDevice(void* remoteMatrix) +{ + struct EllDevice *devMat = (struct EllDevice *) remoteMatrix; + //fprintf(stderr,"freeEllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->rS); + freeRemoteBuffer(devMat->rP); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocEllDevice(void** deviceMat,unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex) +{ int i; + EllDeviceParams p; + + p = getEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex); + i = allocEllDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +} + +void sspmdmm_gpu(float *z,int s, int vPitch, float *y, float alpha, float* cM, int* rP, int* rS, + int avgRowSize, int maxRowSize, int rows, int pitch, float *x, float beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + + for (i=0; icount_ == x->count_, "ERROR: x and y don't share the same number of vectors"); + __assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)"); + __assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)"); +#endif + /*spgpuSellspmv (handle, (float*) y->v_, (float*)y->v_, alpha, + (float*) devMat->cM, devMat->rP, devMat->cMPitch, + devMat->rPPitch, devMat->rS, devMat->rows, + (float*)x->v_, beta, devMat->baseIndex);*/ + sspmdmm_gpu ( (float *)y->v_,y->count_, y->pitch_, (float *)y->v_, alpha, (float *)devMat->cM, devMat->rP, devMat->rS, + devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, + (float *)x->v_, beta, devMat->baseIndex); + return(i); +} + + +void +dspmdmm_gpu (double *z,int s, int vPitch, double *y, double alpha, double* cM, int* rP, + int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + double *x, double beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + for (i=0; iv_, (double*)y->v_, alpha, (double*) devMat->cM, devMat->rP, devMat->cMPitch, devMat->rPPitch, devMat->rS, devMat->rows, (double*)x->v_, beta, devMat->baseIndex);*/ + /* fprintf(stderr,"From spmvEllDouble: mat %d %d %d %d y %d %d \n", */ + /* devMat->avgRowSize, devMat->maxRowSize, devMat->rows, */ + /* devMat->pitch, y->count_, y->pitch_); */ + dspmdmm_gpu ((double *)y->v_, y->count_, y->pitch_, (double *)y->v_, + alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, + devMat->maxRowSize, devMat->rows, devMat->pitch, + (double *)x->v_, beta, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +void +cspmdmm_gpu (cuFloatComplex *z, int s, int vPitch, cuFloatComplex *y, + cuFloatComplex alpha, cuFloatComplex* cM, + int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + cuFloatComplex *x, cuFloatComplex beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + for (i=0; iv_, y->count_, y->pitch_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, devMat->pitch, + (cuFloatComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +void +zspmdmm_gpu (cuDoubleComplex *z, int s, int vPitch, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* cM, + int* rP, int* rS, int avgRowSize, int maxRowSize, int rows, int pitch, + cuDoubleComplex *x, cuDoubleComplex beta, int firstIndex) +{ + int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + for (i=0; iv_, y->count_, y->pitch_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, + devMat->rP, devMat->rS, devMat->avgRowSize, devMat->maxRowSize, devMat->rows, + devMat->pitch, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + //i = writeEllDevice(deviceMat, (void *) val, ja, irn); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + +int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*i = readEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int getEllDevicePitch(void* deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = devMat->pitch; //old + //i = getPitchEllDevice(deviceMat); + return(i); +} + +int getEllDeviceMaxRowSize(void* deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + i = devMat->maxRowSize; + return(i); +} + + + + +// New copying interface + +int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float *val, void *deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + float *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_cudaGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_s_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + } + return SPGPU_SUCCESS; +} + + + +int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double *val, void *deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + double *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_cudaGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_d_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + + +int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float complex *val, void *deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + float complex *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_cudaGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_c_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float complex)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +} + + + +int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double complex *val, void *deviceMat) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + double complex *devVal; + int *devIdisp, *devJa; + spgpuHandle_t handle; + handle = psb_cudaGetHandle(); + + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + + if (i==0) psi_cuda_z_CopyCooToElg(handle,nr,nc,nza,devMat->baseIndex,hacksz,ldv,nzm, + (int *) devMat->rS,devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM); + // Ex updateFromHost function + //i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double complex)); + //if (i==0) i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + //if (i==0) i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +} + + +int dev_csputEllDeviceFloat(void* deviceMat, int nnz, void *ia, void *ja, void *val) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + float alpha=1.0; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuSellcsput(handle,alpha,(float *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (float *) devVal->v_, 1); + + return SPGPU_SUCCESS; +} + +int dev_csputEllDeviceDouble(void* deviceMat, int nnz, void *ia, void *ja, void *val) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + double alpha=1.0; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuDellcsput(handle,alpha,(double *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (double *) devVal->v_, 1); + + return SPGPU_SUCCESS; +} + + +int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + cuFloatComplex alpha = make_cuFloatComplex(1.0, 0.0); + spgpuHandle_t handle=psb_cudaGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuCellcsput(handle,alpha,(cuFloatComplex *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (cuFloatComplex *) devVal->v_, 1); + + return SPGPU_SUCCESS; +} + +int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val) +{ int i; + struct EllDevice *devMat = (struct EllDevice *) deviceMat; + struct MultiVectDevice *devVal = (struct MultiVectDevice *) val; + struct MultiVectDevice *devIa = (struct MultiVectDevice *) ia; + struct MultiVectDevice *devJa = (struct MultiVectDevice *) ja; + cuDoubleComplex alpha = make_cuDoubleComplex(1.0, 0.0); + spgpuHandle_t handle=psb_cudaGetHandle(); + + if (nnz <=0) return SPGPU_SUCCESS; + //fprintf(stderr,"Going through csputEllDeviceDouble %d %p %d\n",nnz,devUpdIdx,cnt); + + spgpuZellcsput(handle,alpha,(cuDoubleComplex *) devMat->cM, + devMat->rP,devMat->pitch, devMat->pitch, devMat->rS, + nnz, devIa->v_, devJa->v_, (cuDoubleComplex *) devVal->v_, 1); + + return SPGPU_SUCCESS; +} + + diff --git a/cuda/elldev.h b/cuda/elldev.h new file mode 100644 index 00000000..4e69bb3a --- /dev/null +++ b/cuda/elldev.h @@ -0,0 +1,179 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef _ELLDEV_H_ +#define _ELLDEV_H_ + +#include "cintrf.h" +#include "vectordev.h" +#include "cuComplex.h" +#include "ell.h" + +struct EllDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // row pointers (same size of cM) + int *rP; + int *diag; + // row size + int *rS; + + //matrix size (uncompressed) + int rows; + int columns; + + int pitch; //old + + int cMPitch; + + int rPPitch; + + int maxRowSize; + int avgRowSize; + + //allocation size (in elements) + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; + /* real/complex, single/double */ + int dataType; + +}; + +typedef struct EllDeviceParams +{ + // The resulting allocation for cM and rP will be pitch*maxRowSize*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + // Largest row size + unsigned int maxRowSize; + unsigned int avgRowSize; + + // First index (e.g 0 or 1) + unsigned int firstIndex; +} EllDeviceParams; + +int computeEllAllocPitch(int rowsCount); +int FallocEllDevice(void** deviceMat, unsigned int rows, unsigned int maxRowSize, + unsigned int nnzeros, + unsigned int columns, unsigned int elementType, + unsigned int firstIndex); +int allocEllDevice(void ** remoteMatrix, EllDeviceParams* params); +void freeEllDevice(void* remoteMatrix); + +int writeEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag); +int writeEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag); + +int readEllDeviceFloat(void* deviceMat, float* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceDouble(void* deviceMat, double* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int ldj, int* irn, int *idiag); +int readEllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int ldj, int* irn, int *idiag); + +int spmvEllDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); +int spmvEllDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); +int spmvEllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, + float complex beta, void* deviceY); +int spmvEllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, + double complex beta, void* deviceY); + + + +int psiCopyCooToElgFloat(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float *val, void *deviceMat); + +int psiCopyCooToElgDouble(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double *val, void *deviceMat); + +int psiCopyCooToElgFloatComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, float complex *val, void *deviceMat); + +int psiCopyCooToElgDoubleComplex(int nr, int nc, int nza, int hacksz, int ldv, int nzm, int *irn, + int *idisp, int *ja, double complex *val, void *deviceMat); + + +void psi_cuda_s_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, float *devVal, + int *idiag, int *rP, float *cM); + +void psi_cuda_d_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, double *devVal, + int *idiag, int *rP, double *cM); + +void psi_cuda_c_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, float complex *devVal, + int *idiag, int *rP, float complex *cM); + +void psi_cuda_z_CopyCooToElg(spgpuHandle_t handle, int nr, int nc, int nza, int baseIdx, + int hacksz, int ldv, int nzm, + int *rS,int *devIdisp, int *devJa, double complex *devVal, + int *idiag, int *rP, double complex *cM); + + +int dev_csputEllDeviceFloat(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceDouble(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceFloatComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val); +int dev_csputEllDeviceDoubleComplex(void* deviceMat, int nnz, + void *ia, void *ja, void *val); + +void zeroEllDevice(void* deviceMat); + +int getEllDevicePitch(void* deviceMat); + +// sparse Ell matrix-vector product +//int spmvEllDeviceFloat(void *deviceMat, float* alpha, void* deviceX, float* beta, void* deviceY); +//int spmvEllDeviceDouble(void *deviceMat, double* alpha, void* deviceX, double* beta, void* deviceY); + +#endif diff --git a/cuda/elldev_mod.F90 b/cuda/elldev_mod.F90 new file mode 100644 index 00000000..40cf8e49 --- /dev/null +++ b/cuda/elldev_mod.F90 @@ -0,0 +1,321 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module elldev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: elldev_parms + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: maxRowSize + integer(c_int) :: avgRowSize + integer(c_int) :: firstIndex + end type elldev_parms + + interface + function FgetEllDeviceParams(rows, maxRowSize, nnzeros, columns, elementType, firstIndex) & + & result(res) bind(c,name='getEllDeviceParams') + use iso_c_binding + import :: elldev_parms + type(elldev_parms) :: res + integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex + end function FgetEllDeviceParams + end interface + + + interface + function FallocEllDevice(deviceMat,rows,maxRowSize,nnzeros,columns,& + & elementType,firstIndex) & + & result(res) bind(c,name='FallocEllDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,maxRowSize,nnzeros,columns,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocEllDevice + end interface + + + interface writeEllDevice + + function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_float) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceFloat + + function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_double) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceDouble + + function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_float_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceFloatComplex + + function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='writeEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_double_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function writeEllDeviceDoubleComplex + + end interface + + interface readEllDevice + + function readEllDeviceFloat(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_float) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceFloat + + function readEllDeviceDouble(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + real(c_double) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceDouble + + function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_float_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceFloatComplex + + function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn,idiag) & + & result(res) bind(c,name='readEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int), value :: ldj + complex(c_double_complex) :: val(ldj,*) + integer(c_int) :: ja(ldj,*),irn(*),idiag(*) + end function readEllDeviceDoubleComplex + + end interface + + interface + subroutine freeEllDevice(deviceMat) & + & bind(c,name='freeEllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeEllDevice + end interface + + interface + subroutine zeroEllDevice(deviceMat) & + & bind(c,name='zeroEllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine zeroEllDevice + end interface + + interface + subroutine resetEllTimer() bind(c,name='resetEllTimer') + use iso_c_binding + end subroutine resetEllTimer + end interface + interface + function getEllTimer() & + & bind(c,name='getEllTimer') result(res) + use iso_c_binding + real(c_double) :: res + end function getEllTimer + end interface + + + interface + function getEllDevicePitch(deviceMat) & + & bind(c,name='getEllDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getEllDevicePitch + end interface + + interface + function getEllDeviceMaxRowSize(deviceMat) & + & bind(c,name='getEllDeviceMaxRowSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceMat + integer(c_int) :: res + end function getEllDeviceMaxRowSize + end interface + + + interface psi_CopyCooToElg + function psiCopyCooToElgFloat(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgFloat + function psiCopyCooToElgDouble(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgDouble + function psiCopyCooToElgFloatComplex(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgFloatComplex + function psiCopyCooToElgDoubleComplex(nr, nc, nza, hacksz, ldv, nzm, irn, & + & idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToElgDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,ldv,nzm + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: irn(*),idisp(*),ja(*) + end function psiCopyCooToElgDoubleComplex + end interface + + interface csputEllDeviceFloat + function dev_csputEllDeviceFloat(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceFloat + end interface + + interface csputEllDeviceDouble + function dev_csputEllDeviceDouble(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceDouble + end interface + + interface csputEllDeviceFloatComplex + function dev_csputEllDeviceFloatComplex(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceFloatComplex + end interface + + interface csputEllDeviceDoubleComplex + function dev_csputEllDeviceDoubleComplex(deviceMat, nnz, ia, ja, val) & + & result(res) bind(c,name='dev_csputEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat , ia, ja, val + integer(c_int), value :: nnz + end function dev_csputEllDeviceDoubleComplex + end interface + + interface spmvEllDevice + function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvEllDeviceFloat + function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvEllDeviceDouble + function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvEllDeviceFloatComplex + function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvEllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvEllDeviceDoubleComplex + end interface + +end module elldev_mod diff --git a/cuda/fcusparse.c b/cuda/fcusparse.c new file mode 100644 index 00000000..094348ce --- /dev/null +++ b/cuda/fcusparse.c @@ -0,0 +1,75 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#include +#include "fcusparse.h" + +static cusparseHandle_t *cusparse_handle=NULL; + + +void setHandle(cusparseHandle_t); + +int FcusparseCreate() +{ + int ret=CUSPARSE_STATUS_SUCCESS; + cusparseHandle_t *handle; + if (cusparse_handle == NULL) { + if ((handle = (cusparseHandle_t *)malloc(sizeof(cusparseHandle_t)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + ret = (int)cusparseCreate(handle); + if (ret == CUSPARSE_STATUS_SUCCESS) + cusparse_handle = handle; + } + //fprintf(stderr,"Created cusparses_handle\n"); + return (ret); +} + +int FcusparseDestroy() +{ + int val; + if (cusparse_handle!=NULL){ + val = (int) cusparseDestroy(*cusparse_handle); + free(cusparse_handle); + } + cusparse_handle=NULL; + return(val); +} +cusparseHandle_t *getHandle() +{ + if (cusparse_handle == NULL) + FcusparseCreate(); + return(cusparse_handle); +} + diff --git a/cuda/fcusparse.h b/cuda/fcusparse.h new file mode 100644 index 00000000..02a45fa3 --- /dev/null +++ b/cuda/fcusparse.h @@ -0,0 +1,72 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#ifndef FCUSPARSE_ +#define FCUSPARSE_ + +#include +#if CUDA_SHORT_VERSION <= 10 +#include +#else +#include +#endif +#include "cintrf.h" +#include "vectordev.h" + +int FcusparseCreate(); +int FcusparseDestroy(); +cusparseHandle_t *getHandle(); + +#define CHECK_CUDA(func) \ +{ \ + cudaError_t status = (func); \ + if (status != cudaSuccess) { \ + printf("CUDA API failed at line %d with error: %s (%d)\n", \ + __LINE__, cudaGetErrorString(status), status); \ + return EXIT_FAILURE; \ + } \ +} + +#define CHECK_CUSPARSE(func) \ +{ \ + cusparseStatus_t status = (func); \ + if (status != CUSPARSE_STATUS_SUCCESS) { \ + printf("CUSPARSE API failed at line %d with error: %s (%d)\n", \ + __LINE__, cusparseGetErrorString(status), status); \ + return EXIT_FAILURE; \ + } \ +} + + +#endif + + diff --git a/cuda/fcusparse_dat.h b/cuda/fcusparse_dat.h new file mode 100644 index 00000000..4b7cd6ce --- /dev/null +++ b/cuda/fcusparse_dat.h @@ -0,0 +1,136 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ +#ifndef FCUSPARSE_DAT_ +#define FCUSPARSE_DAT_ + + +typedef struct T_CSRGDeviceMat +{ +#if CUDA_SHORT_VERSION <= 10 + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; +#elif CUDA_VERSION < 11030 + cusparseMatDescr_t descr; + csrsv2Info_t triang; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#else + cusparseSpMatDescr_t *spmvDescr; + cusparseSpSVDescr_t *spsvDescr; + size_t mvbsize, svbsize; + void *mvbuffer, *svbuffer; +#endif + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_CSRGDeviceMat; + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Cmat +{ + T_CSRGDeviceMat *mat; +} T_Cmat; + +#if CUDA_SHORT_VERSION <= 10 +typedef struct T_HYBGDeviceMat +{ + cusparseMatDescr_t descr; + cusparseSolveAnalysisInfo_t triang; + cusparseHybMat_t hybA; + int m, n, nz; + TYPE *val; + int *irp; + int *ja; +} T_HYBGDeviceMat; + + +/* Interoperability: type coming from Fortran side to distinguish D/S/C/Z. */ +typedef struct T_Hmat +{ + T_HYBGDeviceMat *mat; +} T_Hmat; +#endif + +int T_spmvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_spsvCSRGDevice(T_Cmat *Mat, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_CSRGDeviceAlloc(T_Cmat *Mat,int nr, int nc, int nz); +int T_CSRGDeviceFree(T_Cmat *Mat); + + +int T_CSRGHost2Device(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +int T_CSRGDevice2Host(T_Cmat *Mat, int m, int n, int nz, + int *irp, int *ja, TYPE *val); + +int T_CSRGDeviceGetParms(T_Cmat *Mat,int *nr, int *nc, int *nz); + +#if CUDA_SHORT_VERSION <= 10 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Mat); +#elif CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatFillMode(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatDiagType(T_Cmat *Mat, int type); +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Mat, int type); +#else + +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat); +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat); +#endif + + + +#if CUDA_SHORT_VERSION <= 10 + + +int T_HYBGDeviceFree(T_Hmat *Matrix); +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz); +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type); +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type); +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix); +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY); +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val); +#endif + +#endif diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h new file mode 100644 index 00000000..12be21bd --- /dev/null +++ b/cuda/fcusparse_fct.h @@ -0,0 +1,729 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +int T_spmvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_CSRGDeviceMat *cMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); + TYPE ealpha=alpha, ebeta=beta; +#if CUDA_SHORT_VERSION <= 10 + /* getAddrMultiVecDevice(deviceX, &vX); */ + /* getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + CHECK_CUSPARSE(cusparseTcsrmv(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + (const TYPE *) vX, (const TYPE *) &beta, (TYPE *) vY)); + +#elif CUDA_VERSION < 11030 + size_t bfsz; + vX=x->v_; + vY=y->v_; +#if 1 + CHECK_CUSPARSE(cusparseCsrmvEx_bufferSize(*my_handle,CUSPARSE_ALG_MERGE_PATH, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz, + (const void *) &ealpha,CUSPARSE_BASE_TYPE, + cMat->descr, + (const void *) cMat->val, + CUSPARSE_BASE_TYPE, + (const int *) cMat->irp, + (const int *) cMat->ja, + (const void *) vX, CUSPARSE_BASE_TYPE, + (const void *) &ebeta, CUSPARSE_BASE_TYPE, + (void *) vY, CUSPARSE_BASE_TYPE, + CUSPARSE_BASE_TYPE, &bfsz)); +#else + bfsz=cMat->nz; +#endif + + if (bfsz > cMat->mvbsize) { + if (cMat->mvbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->mvbuffer)); + cMat->mvbuffer = NULL; + } + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); + cMat->mvbsize = bfsz; + } + CHECK_CUSPARSE(cusparseCsrmvEx(*my_handle, + CUSPARSE_ALG_MERGE_PATH, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->n,cMat->nz, + (const void *) &ealpha,CUSPARSE_BASE_TYPE, + cMat->descr, + (const void *) cMat->val, CUSPARSE_BASE_TYPE, + (const int *) cMat->irp, (const int *) cMat->ja, + (const void *) vX, CUSPARSE_BASE_TYPE, + (const void *) &ebeta, CUSPARSE_BASE_TYPE, + (void *) vY, CUSPARSE_BASE_TYPE, + CUSPARSE_BASE_TYPE, (void *) cMat->mvbuffer)); + +#else + cusparseDnVecDescr_t vecX, vecY; + size_t bfsz; + + if (T_CSRGIsNullMvDescr(cMat)) { + cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *)); + } + T_CSRGCreateSpMVDescr(cMat); + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE(cusparseSpMV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, + CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, + &bfsz)); + if (bfsz > cMat->mvbsize) { + if (cMat->mvbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->mvbuffer)); + cMat->mvbuffer = NULL; + } + //CHECK_CUDA(cudaMalloc((void **) &(cMat->mvbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->mvbuffer), bfsz); + + cMat->mvbsize = bfsz; + } + CHECK_CUSPARSE(cusparseSpMV(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,(*(cMat->spmvDescr)),vecX,&beta,vecY, + CUSPARSE_BASE_TYPE,CUSPARSE_SPMV_ALG_DEFAULT, + cMat->mvbuffer)); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); +#endif +} + +int T_spsvCSRGDevice(T_Cmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_CSRGDeviceMat *cMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); +#if CUDA_SHORT_VERSION <= 10 + vX=x->v_; + vY=y->v_; + + return cusparseTcsrsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,(const TYPE *) &alpha,cMat->descr, + cMat->val, cMat->irp, cMat->ja, cMat->triang, + (const TYPE *) vX, (TYPE *) vY); +#elif CUDA_VERSION < 11030 + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE(cusparseTcsrsv2_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, + (const TYPE *) &alpha, + cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, + (const TYPE *) vX, (TYPE *) vY, + CUSPARSE_SOLVE_POLICY_USE_LEVEL, + (void *) cMat->svbuffer)); +#else + cusparseDnVecDescr_t vecX, vecY; + size_t bfsz; + vX=x->v_; + vY=y->v_; + CHECK_CUSPARSE( cusparseCreateDnVec(&vecY, cMat->m, vY, CUSPARSE_BASE_TYPE) ); + CHECK_CUSPARSE( cusparseCreateDnVec(&vecX, cMat->n, vX, CUSPARSE_BASE_TYPE) ); + if (T_CSRGIsNullMvDescr(cMat)) { + cMat->spmvDescr = (cusparseSpMatDescr_t *) malloc(sizeof(cusparseSpMatDescr_t *)); + } + T_CSRGCreateSpMVDescr(cMat); + // fprintf(stderr,"Entry to SpSVDevice: %d %p\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr); + if (T_CSRGIsNullSvDescr(cMat)) { + cMat->spsvDescr=(cusparseSpSVDescr_t *) malloc(sizeof(cusparseSpSVDescr_t *)); + cMat->svbsize=0; + CHECK_CUSPARSE( cusparseSpSV_createDescr(cMat->spsvDescr) ); + //fprintf(stderr,"Entry to SpSVDevice: %d %p %d\n", + // T_CSRGIsNullSvDescr(cMat),cMat->spsvDescr,cMat->svbsize); + CHECK_CUSPARSE(cusparseSpSV_bufferSize(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,*(cMat->spmvDescr),vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + &bfsz)); + if (bfsz > cMat->svbsize) { + if (cMat->svbuffer != NULL) { + CHECK_CUDA(cudaFree(cMat->svbuffer)); + cMat->svbuffer = NULL; + } + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + + cMat->svbsize=bfsz; + CHECK_CUSPARSE(cusparseSpSV_analysis(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha, + *(cMat->spmvDescr), + vecX, vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr), + cMat->svbuffer)); + } + if (T_CSRGIsNullSvBuffer(cMat)) { + fprintf(stderr,"SpSV_SOLVE NULL spsv-buffer\n"); + } + } + CHECK_CUSPARSE(cusparseSpSV_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + &alpha,*(cMat->spmvDescr),vecX,vecY, + CUSPARSE_BASE_TYPE, + CUSPARSE_SPSV_ALG_DEFAULT, + *(cMat->spsvDescr))); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecX) ); + CHECK_CUSPARSE(cusparseDestroyDnVec(vecY) ); + CHECK_CUSPARSE(cusparseDestroySpMat(*(cMat->spmvDescr))); +#endif +} + +#if CUDA_VERSION >= 11030 +int T_CSRGCreateSpMVDescr(T_CSRGDeviceMat *cMat) +{ + int64_t tr,tc,tz; + tr = cMat->m; + tc = cMat->n; + tz = cMat->nz; + CHECK_CUSPARSE(cusparseCreateCsr(cMat->spmvDescr, + tr,tc,tz, + (void *) cMat->irp, + (void *) cMat->ja, + (void *) cMat->val, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ONE, + CUSPARSE_BASE_TYPE) ); +} +#endif +int T_CSRGDeviceAlloc(T_Cmat *Matrix,int nr, int nc, int nz) +{ + T_CSRGDeviceMat *cMat; + int nr1=nr, nz1=nz, rc; + cusparseHandle_t *my_handle=getHandle(); + int bfsz; + + if ((nr<0)||(nc<0)||(nz<0)) + return((int) CUSPARSE_STATUS_INVALID_VALUE); + if ((cMat=(T_CSRGDeviceMat *) malloc(sizeof(T_CSRGDeviceMat)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + cMat->m = nr; + cMat->n = nc; + cMat->nz = nz; + if (nr1 == 0) nr1 = 1; + if (nz1 == 0) nz1 = 1; + if ((rc= allocRemoteBuffer(((void **) &(cMat->irp)), ((nr1+1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(cMat->ja)), ((nz1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(cMat->val)), ((nz1)*sizeof(TYPE)))) != 0) + return(rc); +#if CUDA_SHORT_VERSION <= 10 + if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) + return(rc); + if ((rc= cusparseCreateSolveAnalysisInfo(&(cMat->triang))) !=0) + return(rc); +#elif CUDA_VERSION < 11030 + if ((rc= cusparseCreateMatDescr(&(cMat->descr))) !=0) + return(rc); + CHECK_CUSPARSE(cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL)); + CHECK_CUSPARSE(cusparseSetMatDiagType(cMat->descr,CUSPARSE_DIAG_TYPE_NON_UNIT)); + CHECK_CUSPARSE(cusparseSetMatIndexBase(cMat->descr,CUSPARSE_INDEX_BASE_ONE)); + CHECK_CUSPARSE(cusparseCreateCsrsv2Info(&(cMat->triang))); + if (cMat->nz > 0) { + CHECK_CUSPARSE(cusparseTcsrsv2_bufferSize(*my_handle, + CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, &bfsz)); + } else { + bfsz = 0; + } + + /* if (cMat->svbuffer != NULL) { */ + /* fprintf(stderr,"Calling cudaFree\n"); */ + /* CHECK_CUDA(cudaFree(cMat->svbuffer)); */ + /* cMat->svbuffer = NULL; */ + /* } */ + if (bfsz > 0) { + //CHECK_CUDA(cudaMalloc((void **) &(cMat->svbuffer), bfsz)); + allocRemoteBuffer((void **) &(cMat->svbuffer), bfsz); + + } else { + cMat->svbuffer=NULL; + } + cMat->svbsize=bfsz; + + cMat->mvbuffer=NULL; + cMat->mvbsize = 0; + + +#else + + cMat->spmvDescr=NULL; + cMat->spsvDescr=NULL; + cMat->mvbuffer=NULL; + cMat->svbuffer=NULL; + cMat->mvbsize=0; + cMat->svbsize=0; +#endif + Matrix->mat = cMat; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDeviceFree(T_Cmat *Matrix) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + freeRemoteBuffer(cMat->irp); + freeRemoteBuffer(cMat->ja); + freeRemoteBuffer(cMat->val); +#if CUDA_SHORT_VERSION <= 10 + cusparseDestroyMatDescr(cMat->descr); + cusparseDestroySolveAnalysisInfo(cMat->triang); +#elif CUDA_VERSION < 11030 + cusparseDestroyMatDescr(cMat->descr); + cusparseDestroyCsrsv2Info(cMat->triang); +#else + if (!T_CSRGIsNullMvDescr(cMat)) { + // already destroyed spmvDescr, just free the pointer + free(cMat->spmvDescr); + cMat->spmvDescr=NULL; + } + if (cMat->mvbuffer!=NULL) + CHECK_CUDA( cudaFree(cMat->mvbuffer)); + cMat->mvbuffer=NULL; + cMat->mvbsize=0; + if (!T_CSRGIsNullSvDescr(cMat)) { + CHECK_CUSPARSE(cusparseSpSV_destroyDescr(*(cMat->spsvDescr))); + free(cMat->spsvDescr); + cMat->spsvDescr=NULL; + } + if (cMat->svbuffer!=NULL) + CHECK_CUDA( cudaFree(cMat->svbuffer)); + cMat->svbuffer=NULL; + cMat->svbsize=0; +#endif + free(cMat); + Matrix->mat = NULL; + } + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDeviceGetParms(T_Cmat *Matrix,int *nr, int *nc, int *nz) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + + if (cMat!=NULL) { + *nr = cMat->m ; + *nc = cMat->n ; + *nz = cMat->nz ; + return(CUSPARSE_STATUS_SUCCESS); + } else { + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + } +} + +#if CUDA_SHORT_VERSION <= 10 + +int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(cMat->descr,type)); +} + +int T_CSRGDeviceCsrsmAnalysis(T_Cmat *Matrix) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + int rc, buffersize; + cusparseHandle_t *my_handle=getHandle(); + cusparseSolveAnalysisInfo_t info; + + rc= (int) cusparseTcsrsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz,cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang); + if (rc !=0) { + fprintf(stderr,"From csrsv_analysis: %d\n",rc); + } + return(rc); +} + +#elif CUDA_VERSION < 11030 +int T_CSRGDeviceSetMatType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(cMat->descr,type)); +} + +int T_CSRGDeviceSetMatIndexBase(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(cMat->descr,type)); +} + +#else + +int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseFillMode_t mode=type; + + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, + CUSPARSE_SPMAT_FILL_MODE, + (const void*) &mode, + sizeof(cusparseFillMode_t))); + return(0); +} + +int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type) +{ + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseDiagType_t cutype=type; + CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, + CUSPARSE_SPMAT_DIAG_TYPE, + (const void*) &cutype, + sizeof(cusparseDiagType_t))); + return(0); +} + +int T_CSRGIsNullMvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spmvDescr == NULL); +} + +int T_CSRGIsNullSvBuffer(T_CSRGDeviceMat *cMat) +{ + return(cMat->svbuffer == NULL); +} +int T_CSRGIsNullSvDescr(T_CSRGDeviceMat *cMat) +{ + return(cMat->spsvDescr == NULL); +} + +#endif + +int T_CSRGHost2Device(T_Cmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; + T_CSRGDeviceMat *cMat= Matrix->mat; + cusparseHandle_t *my_handle=getHandle(); + + if ((rc=writeRemoteBuffer((void *) irp, (void *) cMat->irp, + (m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=writeRemoteBuffer((void *) ja,(void *) cMat->ja, + (nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=writeRemoteBuffer((void *) val, (void *) cMat->val, + (nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); +#if (CUDA_SHORT_VERSION > 10 ) && (CUDA_VERSION < 11030) + if (cusparseGetMatType(cMat->descr)== CUSPARSE_MATRIX_TYPE_TRIANGULAR) { + // Why do we need to set TYPE_GENERAL??? cuSPARSE can be misterious sometimes. + cusparseSetMatType(cMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); + CHECK_CUSPARSE(cusparseTcsrsv2_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + cMat->m,cMat->nz, cMat->descr, + cMat->val, cMat->irp, cMat->ja, + cMat->triang, CUSPARSE_SOLVE_POLICY_USE_LEVEL, + cMat->svbuffer)); + } +#else + //cusparseSetMatType(*(cMat->spmvDescr),CUSPARSE_MATRIX_TYPE_GENERAL); +#endif + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_CSRGDevice2Host(T_Cmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; + T_CSRGDeviceMat *cMat = Matrix->mat; + + if ((rc=readRemoteBuffer((void *) irp, (void *) cMat->irp, (m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=readRemoteBuffer((void *) ja, (void *) cMat->ja, (nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=readRemoteBuffer((void *) val, (void *) cMat->val, (nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); + + return(CUSPARSE_STATUS_SUCCESS); +} + +#if CUDA_SHORT_VERSION <= 10 +int T_HYBGDeviceFree(T_Hmat *Matrix) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + if (hMat != NULL) { + cusparseDestroyMatDescr(hMat->descr); + cusparseDestroySolveAnalysisInfo(hMat->triang); + cusparseDestroyHybMat(hMat->hybA); + free(hMat); + } + Matrix->mat = NULL; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_spmvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + T_HYBGDeviceMat *hMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n,rc; + cusparseMatrixType_t type; + cusparseHandle_t *my_handle=getHandle(); + + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Spmv MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Spmv DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Spmv FillMode: %d\n",rc); */ + /* Dirty trick: apparently hybmv does not accept a triangular + matrix even though it should not make a difference. So + we claim it's general anyway */ + type = cusparseGetMatType(hMat->descr); + rc = cusparseSetMatType(hMat->descr,CUSPARSE_MATRIX_TYPE_GENERAL); + if (rc == 0) + rc = (int) cusparseThybmv(*my_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, + (const TYPE *) &alpha, hMat->descr, hMat->hybA, + (const TYPE *) vX, (const TYPE *) &beta, + (TYPE *) vY); + if (rc == 0) + rc = cusparseSetMatType(hMat->descr,type); + return(rc); +} + +int T_HYBGDeviceAlloc(T_Hmat *Matrix,int nr, int nc, int nz) +{ + T_HYBGDeviceMat *hMat; + int nr1=nr, nz1=nz, rc; + if ((nr<0)||(nc<0)||(nz<0)) + return((int) CUSPARSE_STATUS_INVALID_VALUE); + if ((hMat=(T_HYBGDeviceMat *) malloc(sizeof(T_HYBGDeviceMat)))==NULL) + return((int) CUSPARSE_STATUS_ALLOC_FAILED); + hMat->m = nr; + hMat->n = nc; + hMat->nz = nz; + + if ((rc= cusparseCreateMatDescr(&(hMat->descr))) !=0) + return(rc); + if ((rc= cusparseCreateSolveAnalysisInfo(&(hMat->triang))) !=0) + return(rc); + if((rc = cusparseCreateHybMat(&(hMat->hybA))) != 0) + return(rc); + Matrix->mat = hMat; + return(CUSPARSE_STATUS_SUCCESS); +} + +int T_HYBGDeviceSetMatDiagType(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatDiagType(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatIndexBase(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatIndexBase(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatType(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatType(hMat->descr,type)); +} + +int T_HYBGDeviceSetMatFillMode(T_Hmat *Matrix, int type) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + return ((int) cusparseSetMatFillMode(hMat->descr,type)); +} + +int T_spsvHYBGDevice(T_Hmat *Matrix, TYPE alpha, void *deviceX, + TYPE beta, void *deviceY) +{ + //beta?? + T_HYBGDeviceMat *hMat=Matrix->mat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + void *vX, *vY; + int r,n; + cusparseHandle_t *my_handle=getHandle(); + /*getAddrMultiVecDevice(deviceX, &vX); + getAddrMultiVecDevice(deviceY, &vY); */ + vX=x->v_; + vY=y->v_; + + return cusparseThybsv_solve(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + (const TYPE *) &alpha, hMat->descr, + hMat->hybA, hMat->triang, + (const TYPE *) vX, (TYPE *) vY); +} + +int T_HYBGDeviceHybsmAnalysis(T_Hmat *Matrix) +{ + T_HYBGDeviceMat *hMat= Matrix->mat; + cusparseSolveAnalysisInfo_t info; + int rc; + cusparseHandle_t *my_handle=getHandle(); + + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Analysis MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Analysis DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Analysis FillMode: %d\n",rc); */ + rc = (int) cusparseThybsv_analysis(*my_handle,CUSPARSE_OPERATION_NON_TRANSPOSE, + hMat->descr, hMat->hybA, hMat->triang); + + if (rc !=0) { + fprintf(stderr,"From csrsv_analysis: %d\n",rc); + } + return(rc); +} + +int T_HYBGHost2Device(T_Hmat *Matrix, int m, int n, int nz, + int *irp, int *ja, TYPE *val) +{ + int rc; double t1,t2; + int nr1=m, nz1=nz; + T_HYBGDeviceMat *hMat= Matrix->mat; + cusparseHandle_t *my_handle=getHandle(); + + if (nr1 == 0) nr1 = 1; + if (nz1 == 0) nz1 = 1; + if ((rc= allocRemoteBuffer(((void **) &(hMat->irp)), ((nr1+1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(hMat->ja)), ((nz1)*sizeof(int)))) != 0) + return(rc); + if ((rc= allocRemoteBuffer(((void **) &(hMat->val)), ((nz1)*sizeof(TYPE)))) != 0) + return(rc); + + if ((rc=writeRemoteBuffer((void *) irp, (void *) hMat->irp, + (m+1)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + + if ((rc=writeRemoteBuffer((void *) ja,(void *) hMat->ja, + (nz)*sizeof(int))) + != SPGPU_SUCCESS) + return(rc); + if ((rc=writeRemoteBuffer((void *) val, (void *) hMat->val, + (nz)*sizeof(TYPE))) + != SPGPU_SUCCESS) + return(rc); + /* rc = (int) cusparseGetMatType(hMat->descr); */ + /* fprintf(stderr,"Conversion MatType: %d\n",rc); */ + /* rc = (int) cusparseGetMatDiagType(hMat->descr); */ + /* fprintf(stderr,"Conversion DiagType: %d\n",rc); */ + /* rc = (int) cusparseGetMatFillMode(hMat->descr); */ + /* fprintf(stderr,"Conversion FillMode: %d\n",rc); */ + //t1=etime(); + rc = (int) cusparseTcsr2hyb(*my_handle, m, n, + hMat->descr, + (const TYPE *)hMat->val, + (const int *)hMat->irp, (const int *)hMat->ja, + hMat->hybA,0, + CUSPARSE_HYB_PARTITION_AUTO); + + freeRemoteBuffer(hMat->irp); hMat->irp = NULL; + freeRemoteBuffer(hMat->ja); hMat->ja = NULL; + freeRemoteBuffer(hMat->val); hMat->val = NULL; + + //cudaSync(); + //t2 = etime(); + //fprintf(stderr,"Inner call to cusparseTcsr2hyb: %lf\n",(t2-t1)); + if (rc != 0) { + fprintf(stderr,"From csr2hyb: %d\n",rc); + } + return(rc); +} +#endif + diff --git a/cuda/hdiagdev.c b/cuda/hdiagdev.c new file mode 100644 index 00000000..6302eed1 --- /dev/null +++ b/cuda/hdiagdev.c @@ -0,0 +1,386 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "hdiagdev.h" +#include +#include +#include +#include +#define DEBUG 0 +void freeHdiagDevice(void* remoteMatrix) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->hackOffsets); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + + +HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType) +{ + HdiagDeviceParams params; + + params.elementType = elementType; + //numero di elementi di val + params.rows = rows; + params.columns = columns; + params.allocationHeight = allocationHeight; + params.hackSize = hackSize; + params.hackCount = hackCount; + + return params; + +} + +int allocHdiagDevice(void **remoteMatrix, HdiagDeviceParams* params) +{ + struct HdiagDevice *tmp = (struct HdiagDevice *)malloc(sizeof(struct HdiagDevice)); + int ret=SPGPU_SUCCESS; + int *tmpOff = NULL; + + *remoteMatrix = (void *) tmp; +#if DEBUG + fprintf(stderr,"From alloc: %p\n",*remoteMatrix); +#endif + + tmp->rows = params->rows; + + tmp->hackSize = params->hackSize; + + tmp->cols = params->columns; + + tmp->allocationHeight = params->allocationHeight; + + tmp->hackCount = params->hackCount; + + + +#if DEBUG + fprintf(stderr,"hackcount %d allocationHeight %d\n",tmp->hackCount,tmp->allocationHeight); +#endif + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hackOffsets), (tmp->hackCount+1)*sizeof(int)); + + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hdiaOffsets), tmp->allocationHeight*sizeof(int)); + + /* tmp->baseIndex = params->firstIndex; */ + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->hackSize*tmp->allocationHeight*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType) +{ int i=0; + HdiagDeviceParams p; + + p = getHdiagDeviceParams(rows, cols, allocationHeight, hackSize, hackCount,elementType); + + i = allocHdiagDevice(deviceMat, &p); +#if DEBUG + fprintf(stderr," Falloc %p \n",*deviceMat); +#endif + + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); + +} + +int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets) +{ int i=0,fo,fa,j,k,p; + char buf_a[255], buf_o[255],tmp[255]; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + i=SPGPU_SUCCESS; + + +#if DEBUG + fprintf(stderr," Write %p \n",devMat); + + fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n", + devMat->allocationHeight,devMat->hackCount); + fprintf(stderr,"HackOffsets: "); + for (j=0; jhackCount+1; j++) + fprintf(stderr," %d",hackOffsets[j]); + fprintf(stderr,"\n"); + fprintf(stderr,"diaOffsets: "); + for (j=0; jallocationHeight; j++) + fprintf(stderr," %d",hdiaOffsets[j]); + fprintf(stderr,"\n"); +#if 1 + fprintf(stderr,"values: \n"); + p=0; + for (j=0; jhackCount; j++){ + fprintf(stderr,"Hack no: %d\n",j+1); + for (k=0; khackSize*(devMat->allocationHeight/devMat->hackCount); k++){ + fprintf(stderr," %d %lf\n",p+1,val[p]); p++; + } + } + fprintf(stderr,"\n"); +#endif +#endif + + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, + (devMat->hackCount+1)*sizeof(int)); + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, + devMat->allocationHeight*sizeof(int)); + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + devMat->allocationHeight*devMat->hackSize*sizeof(double)); + if (i!=0) + fprintf(stderr,"Error in writeHdiagDeviceDouble %d\n",i); + +#if DEBUG + fprintf(stderr," EndWrite %p \n",devMat); +#endif + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + + + +long long int sizeofHdiagDeviceDouble(void* deviceMat) +{ int i=0,fo,fa; + int *hoff=NULL,*hackoff=NULL; + long long int memsize=0; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + + memsize += (devMat->hackCount+1)*sizeof(int); + memsize += devMat->allocationHeight*sizeof(int); + memsize += devMat->allocationHeight*devMat->hackSize*sizeof(double); + return(memsize); +} + + + +int readHdiagDeviceDouble(void* deviceMat, double* a, int* off) +{ int i; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(double)); */ + /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ + + + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif +#if DEBUG + fprintf(stderr," First %p \n",devMat); + fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize, + devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM); +#endif + spgpuDhdiaspmv (handle, (double*)y->v_, (double *)y->v_, alpha, + (double *)devMat->cM,devMat->hdiaOffsets, + devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols, + x->v_, beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +} + +int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets) +{ int i=0,fo,fa,j,k,p; + char buf_a[255], buf_o[255],tmp[255]; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + i=SPGPU_SUCCESS; + + +#if DEBUG + fprintf(stderr," Write %p \n",devMat); + + fprintf(stderr,"HDIAG writing to device memory: allocationHeight %d hackCount %d\n", + devMat->allocationHeight,devMat->hackCount); + fprintf(stderr,"HackOffsets: "); + for (j=0; jhackCount+1; j++) + fprintf(stderr," %d",hackOffsets[j]); + fprintf(stderr,"\n"); + fprintf(stderr,"diaOffsets: "); + for (j=0; jallocationHeight; j++) + fprintf(stderr," %d",hdiaOffsets[j]); + fprintf(stderr,"\n"); +#if 1 + fprintf(stderr,"values: \n"); + p=0; + for (j=0; jhackCount; j++){ + fprintf(stderr,"Hack no: %d\n",j+1); + for (k=0; khackSize*(devMat->allocationHeight/devMat->hackCount); k++){ + fprintf(stderr," %d %lf\n",p+1,val[p]); p++; + } + } + fprintf(stderr,"\n"); +#endif +#endif + + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void *) hackOffsets,(void *) devMat->hackOffsets, + (devMat->hackCount+1)*sizeof(int)); + + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) hdiaOffsets, (void *)devMat->hdiaOffsets, + devMat->allocationHeight*sizeof(int)); + if(i== SPGPU_SUCCESS) + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, + devMat->allocationHeight*devMat->hackSize*sizeof(float)); + if (i!=0) + fprintf(stderr,"Error in writeHdiagDeviceFloat %d\n",i); + +#if DEBUG + fprintf(stderr," EndWrite %p \n",devMat); +#endif + + if(i==0) + return SPGPU_SUCCESS; + else + return SPGPU_UNSUPPORTED; +} + + + +long long int sizeofHdiagDeviceFloat(void* deviceMat) +{ int i=0,fo,fa; + int *hoff=NULL,*hackoff=NULL; + long long int memsize=0; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + + + memsize += (devMat->hackCount+1)*sizeof(int); + memsize += devMat->allocationHeight*sizeof(int); + memsize += devMat->allocationHeight*devMat->hackSize*sizeof(float); + + return(memsize); +} + + + +int readHdiagDeviceFloat(void* deviceMat, float* a, int* off) +{ int i; + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + /* i = readRemoteBuffer((void *) a, (void *)devMat->cM,devMat->rows*devMat->diags*sizeof(float)); */ + /* i = readRemoteBuffer((void *) off, (void *)devMat->off, devMat->diags*sizeof(int)); */ + + + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + struct HdiagDevice *devMat = (struct HdiagDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif +#if DEBUG + fprintf(stderr," First %p \n",devMat); + fprintf(stderr,"%d %d %d %p %p %p\n",devMat->rows,devMat->cols, devMat->hackSize, + devMat->hackOffsets, devMat->hdiaOffsets, devMat->cM); +#endif + spgpuShdiaspmv (handle, (float*)y->v_, (float *)y->v_, alpha, + (float *)devMat->cM,devMat->hdiaOffsets, + devMat->hackSize, devMat->hackOffsets, devMat->rows,devMat->cols, + x->v_, beta); + + //cudaSync(); + + return SPGPU_SUCCESS; +} + diff --git a/cuda/hdiagdev.h b/cuda/hdiagdev.h new file mode 100644 index 00000000..c02fcc69 --- /dev/null +++ b/cuda/hdiagdev.h @@ -0,0 +1,107 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _HDIAGDEV_H_ +#define _HDIAGDEV_H_ + +#include "cintrf.h" +#include "vectordev.h" +#include "hdia.h" + +struct HdiagDevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // offset (same size of cM) + int *hdiaOffsets; + + int *hackOffsets; + + int hackCount; + + int rows; + + int cols; + + + int hackSize; + + int allocationHeight; + +}; + +typedef struct HdiagDeviceParams +{ + + unsigned int elementType; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + unsigned int columns; + + unsigned int hackSize; + unsigned int hackCount; + unsigned int allocationHeight; + + +} HdiagDeviceParams; + + + +HdiagDeviceParams getHdiagDeviceParams(unsigned int rows, unsigned int columns, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType); + +int FallocHdiagDevice(void** deviceMat, unsigned int rows, unsigned int cols, + unsigned int allocationHeight, unsigned int hackSize, + unsigned int hackCount, unsigned int elementType); + +int allocHdiagDevice(void ** remoteMatrix, HdiagDeviceParams* params); + + +void freeHdiagDevice(void* remoteMatrix); + +int writeHdiagDeviceFloat(void* deviceMat, float* val, int* hdiaOffsets, int *hackOffsets); +int spmvHdiagDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY); + +int writeHdiagDeviceDouble(void* deviceMat, double* val, int* hdiaOffsets, int *hackOffsets); +int spmvHdiagDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY); + + +#endif diff --git a/cuda/hdiagdev_mod.F90 b/cuda/hdiagdev_mod.F90 new file mode 100644 index 00000000..9a3530e7 --- /dev/null +++ b/cuda/hdiagdev_mod.F90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module hdiagdev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: hdiagdev_parms + integer(c_int) :: element_type + integer(c_int) :: rows + integer(c_int) :: columns + integer(c_int) :: hackSize + integer(c_int) :: hackCount + integer(c_int) :: allocationHeight + end type hdiagdev_parms + + ! interface computeHdiaHacksCount + ! function computeHdiaHacksCountDouble(allocationHeight,hackOffsets,hackSize, & + ! & diaValues,diaValuesPitch,diags,rows)& + ! & result(res) bind(c,name='computeHdiaHackOffsetsDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! integer(c_int), value :: rows,diags,diaValuesPitch,hackSize,elementType + ! real(c_double) :: diaValues(rows,:) + ! integer(c_int) :: hackOffsets,allocationHeight + ! end function computeHdiaHacksCountDouble + ! end interface computeHdiaHacksCount + + interface + function FgetHdiagDeviceParams(rows, columns, allocationHeight,hackSize, & + & hackCount, elementType) & + & result(res) bind(c,name='getHdiagDeviceParams') + use iso_c_binding + import :: hdiagdev_parms + type(hdiagdev_parms) :: res + integer(c_int), value :: rows,columns,allocationHeight,& + & elementType,hackSize,hackCount + end function FgetHdiagDeviceParams + end interface + + + interface + function FallocHdiagDevice(deviceMat,rows,columns,allocationHeight,& + & hackSize,hackCount,elementType) & + & result(res) bind(c,name='FallocHdiagDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: rows,columns,allocationHeight,hackSize,& + & hackCount,elementType + type(c_ptr) :: deviceMat + end function FallocHdiagDevice + end interface + + + interface + function sizeofHdiagDeviceDouble(deviceMat) & + & result(res) bind(c,name='sizeofHdiagDeviceDouble') + use iso_c_binding + integer(c_long_long) :: res + type(c_ptr), value :: deviceMat + end function sizeofHdiagDeviceDouble + end interface + + interface writeHdiagDevice + + function writeHdiagDeviceFloat(deviceMat,val,hdiaOffsets, hackOffsets) & + & result(res) bind(c,name='writeHdiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: hdiaOffsets(*), hackOffsets(*) + end function writeHdiagDeviceFloat + + function writeHdiagDeviceDouble(deviceMat,val,hdiaOffsets, hackOffsets) & + & result(res) bind(c,name='writeHdiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: hdiaOffsets(*), hackOffsets(*) + end function writeHdiagDeviceDouble + + end interface writeHdiagDevice + +!!$ interface readHdiagDevice +!!$ +!!$ function readHdiagDeviceFloat(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceFloat') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ real(c_float) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceFloat +!!$ +!!$ function readHdiagDeviceDouble(deviceMat,a,off,n) & +!!$ & result(res) bind(c,name='readHdiagDeviceDouble') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int),value :: n +!!$ real(c_double) :: a(n,*) +!!$ integer(c_int) :: off(*) +!!$ end function readHdiagDeviceDouble +!!$ +!!$ function readHdiagDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ complex(c_float_complex) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceFloatComplex +!!$ +!!$ function readHdiagDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & +!!$ & result(res) bind(c,name='readHdiagDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat +!!$ integer(c_int), value :: ldj +!!$ complex(c_double_complex) :: val(ldj,*) +!!$ integer(c_int) :: ja(ldj,*),irn(*) +!!$ end function readHdiagDeviceDoubleComplex +!!$ +!!$ end interface readHdiagDevice +!!$ + interface + subroutine freeHdiagDevice(deviceMat) & + & bind(c,name='freeHdiagDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeHdiagDevice + end interface + + + interface spmvHdiagDevice + function spmvHdiagDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHdiagDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvHdiagDeviceFloat + function spmvHdiagDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHdiagDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvHdiagDeviceDouble +!!$ function spmvHdiagDeviceFloatComplex(deviceMat,alpha,x,beta,y) & +!!$ & result(res) bind(c,name='spmvHdiagDeviceFloatComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat, x, y +!!$ complex(c_float_complex),value :: alpha, beta +!!$ end function spmvHdiagDeviceFloatComplex +!!$ function spmvHdiagDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & +!!$ & result(res) bind(c,name='spmvHdiagDeviceDoubleComplex') +!!$ use iso_c_binding +!!$ integer(c_int) :: res +!!$ type(c_ptr), value :: deviceMat, x, y +!!$ complex(c_double_complex),value :: alpha, beta +!!$ end function spmvHdiagDeviceDoubleComplex + end interface spmvHdiagDevice + +end module hdiagdev_mod diff --git a/cuda/hlldev.c b/cuda/hlldev.c new file mode 100644 index 00000000..9da6a48c --- /dev/null +++ b/cuda/hlldev.c @@ -0,0 +1,540 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#include "hlldev.h" +//new +HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, unsigned int elementType, unsigned int firstIndex) +{ + HllDeviceParams params; + + params.elementType = elementType; + params.hackSize = hksize; + //numero di elementi di val + params.allocsize = allocsize; + params.rows = rows; + params.nzt = nzeros; + params.avgNzr = (nzeros+rows-1)/rows; + params.firstIndex = firstIndex; + return params; + +} + +int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros, + int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr) +{ + + + if (mat!=NULL) { + *hackOffsLength = mat->hackOffsLength ; + *hksize = mat->hackSize ; + *nzeros = mat->nzt ; + *allocsize = mat->allocsize ; + *rows = mat->rows ; + *avgnzr = mat->avgNzr ; + *firstIndex = mat->baseIndex ; + return SPGPU_SUCCESS; + } else { + return SPGPU_UNSUPPORTED; + } +} +//new +int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params) +{ + HllDevice *tmp = (HllDevice *)malloc(sizeof(HllDevice)); + int ret=SPGPU_SUCCESS; + *remoteMatrix = (void *)tmp; + + tmp->hackSize = params->hackSize; + + tmp->allocsize = params->allocsize; + + tmp->rows = params->rows; + tmp->avgNzr = params->avgNzr; + tmp->nzt = params->nzt; + tmp->baseIndex = params->firstIndex; + //fprintf(stderr,"Allocating HLG with %d avgNzr\n",params->avgNzr); + tmp->hackOffsLength = (int)(tmp->rows+tmp->hackSize-1)/tmp->hackSize; + + //printf("hackOffsLength %d\n",tmp->hackOffsLength); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->rP), tmp->allocsize*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->rS), tmp->rows*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->diag), tmp->rows*sizeof(int)); + + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->hackOffs), ((tmp->hackOffsLength+1)*sizeof(int))); + + if (params->elementType == SPGPU_TYPE_INT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (ret == SPGPU_SUCCESS) + ret=allocRemoteBuffer((void **)&(tmp->cM), tmp->allocsize*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return ret; +} + +void freeHllDevice(void* remoteMatrix) +{ + HllDevice *devMat = (HllDevice *) remoteMatrix; + //fprintf(stderr,"freeHllDevice\n"); + if (devMat != NULL) { + freeRemoteBuffer(devMat->rS); + freeRemoteBuffer(devMat->diag); + freeRemoteBuffer(devMat->rP); + freeRemoteBuffer(devMat->cM); + free(remoteMatrix); + } +} + +//new +int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, + unsigned int elementType, unsigned int firstIndex) +{ int i; + HllDeviceParams p; + + p = bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex); + i = allocHllDevice(deviceMat, &p); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocEllDevice",i); + } + return(i); +} + + +int spmvHllDeviceFloat(void *deviceMat, float alpha, void* deviceX, + float beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuShellspmv (handle, (float *)y->v_, (float *)y->v_, alpha, (float *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (float *)x->v_, beta, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +//new +int spmvHllDeviceDouble(void *deviceMat, double alpha, void* deviceX, + double beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuDhellspmv (handle, (double *)y->v_, (double *)y->v_, alpha, (double*)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (double *)x->v_, beta, devMat->baseIndex); + //cudaSync(); + return SPGPU_SUCCESS; +} + +int spmvHllDeviceFloatComplex(void *deviceMat, float complex alpha, void* deviceX, + float complex beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + + cuFloatComplex a = make_cuFloatComplex(crealf(alpha),cimagf(alpha)); + cuFloatComplex b = make_cuFloatComplex(crealf(beta),cimagf(beta)); +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + /*dspmdmm_gpu ((double *)z->v_, y->count_, y->pitch_, (double *)y->v_, alpha, (double *)devMat->cM, + devMat->rP, devMat->rS, devMat->rows, devMat->pitch, (double *)x->v_, beta, + devMat->baseIndex);*/ + + spgpuChellspmv (handle, (cuFloatComplex *)y->v_, (cuFloatComplex *)y->v_, a, (cuFloatComplex *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr, devMat->rows, (cuFloatComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +int spmvHllDeviceDoubleComplex(void *deviceMat, double complex alpha, void* deviceX, + double complex beta, void* deviceY) +{ + HllDevice *devMat = (HllDevice *) deviceMat; + struct MultiVectDevice *x = (struct MultiVectDevice *) deviceX; + struct MultiVectDevice *y = (struct MultiVectDevice *) deviceY; + spgpuHandle_t handle=psb_cudaGetHandle(); + + cuDoubleComplex a = make_cuDoubleComplex(creal(alpha),cimag(alpha)); + cuDoubleComplex b = make_cuDoubleComplex(creal(beta),cimag(beta)); +#ifdef VERBOSE + /*__assert(x->count_ == x->count_, "ERROR: x and y don't share the same number of vectors");*/ + /*__assert(x->size_ >= devMat->columns, "ERROR: x vector's size is not >= to matrix size (columns)");*/ + /*__assert(y->size_ >= devMat->rows, "ERROR: y vector's size is not >= to matrix size (rows)");*/ +#endif + + spgpuZhellspmv (handle, (cuDoubleComplex *)y->v_, (cuDoubleComplex *)y->v_, a, (cuDoubleComplex *)devMat->cM, + devMat->rP,devMat->hackSize,devMat->hackOffs, devMat->rS, NULL, + devMat->avgNzr,devMat->rows, (cuDoubleComplex *)x->v_, b, devMat->baseIndex); + + return SPGPU_SUCCESS; +} + +int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + //i = writeEllDevice(deviceMat, (void *) val, ja, irn); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + // Ex updateFromHost function + i = writeRemoteBuffer((void*) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = writeRemoteBuffer((void*) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = writeRemoteBuffer((void*) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = writeRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(float)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*i = readEllDevice(deviceMat, (void *) val, ja, irn); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceFloat",i); + }*/ + return SPGPU_SUCCESS; +} + +int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(double)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void *) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readHllDeviceFloatComplex(void* deviceMat, float complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuFloatComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, int* ja, int *hkoffs, int* irn, int *idiag) +{ int i; + HllDevice *devMat = (HllDevice *) deviceMat; + i = readRemoteBuffer((void *) val, (void *)devMat->cM, devMat->allocsize*sizeof(cuDoubleComplex)); + i = readRemoteBuffer((void *) ja, (void *)devMat->rP, devMat->allocsize*sizeof(int)); + i = readRemoteBuffer((void *) irn, (void *)devMat->rS, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) idiag, (void *)devMat->diag, devMat->rows*sizeof(int)); + i = readRemoteBuffer((void*) hkoffs, (void *)devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + /*if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readEllDeviceDouble",i); + }*/ + return SPGPU_SUCCESS; +} + +// New copy routines. + +int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float *val, void *deviceMat) +{ int i,j; + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + float *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(float)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(float)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_cudaGetHandle(); + psi_cuda_s_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (float *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloat",i); + } + return SPGPU_SUCCESS; +} + +int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double *val, void *deviceMat) +{ int i,j; + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + double *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(double)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(double)); + //fprintf(stderr,"WriteRemoteBuffer val %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer ja %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer irn %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer hoffs %d\n",i); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //fprintf(stderr,"WriteRemoteBuffer idisp %d\n",i); + //cudaSync(); + //fprintf(stderr," hacksz: %d \n",hacksz); + handle = psb_cudaGetHandle(); + psi_cuda_d_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag, (int *) devMat->rP, (double *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDouble",i); + } + return SPGPU_SUCCESS; +} + +int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float complex *val, void *deviceMat) +{ int i,j; + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + float complex *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuFloatComplex)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuFloatComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_cudaGetHandle(); + psi_cuda_c_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (float complex *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceFloatComplex",i); + } + return SPGPU_SUCCESS; +} + +int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double complex *val, void *deviceMat) +{ int i,j; + spgpuHandle_t handle; + HllDevice *devMat = (HllDevice *) deviceMat; + double complex *devVal; + int *devIdisp, *devJa; + int *tja; + //fprintf(stderr,"devMat: %p\n",devMat); + allocRemoteBuffer((void **)&(devIdisp), (nr+1)*sizeof(int)); + allocRemoteBuffer((void **)&(devJa), (nza)*sizeof(int)); + allocRemoteBuffer((void **)&(devVal), (nza)*sizeof(cuDoubleComplex)); + + // fprintf(stderr,"Writing: %d %d %d %d %d %d %d\n",nr,devMat->rows,nza,isz, hoffs[noffs], noffs, devMat->hackOffsLength); + i = writeRemoteBuffer((void*) val, (void *)devVal, nza*sizeof(cuDoubleComplex)); + if (i==0) i = writeRemoteBuffer((void*) ja, (void *) devJa, nza*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) irn, (void *) devMat->rS, devMat->rows*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) hoffs, (void *) devMat->hackOffs, (devMat->hackOffsLength+1)*sizeof(int)); + if (i==0) i = writeRemoteBuffer((void*) idisp, (void *) devIdisp, (devMat->rows+1)*sizeof(int)); + //cudaSync(); + + handle = psb_cudaGetHandle(); + psi_cuda_z_CopyCooToHlg(handle, nr,nc,nza,devMat->baseIndex,hacksz,noffs,isz, + (int *) devMat->rS, (int *) devMat->hackOffs, + devIdisp,devJa,devVal, + (int *) devMat->diag,(int *) devMat->rP, (double complex *)devMat->cM); + + freeRemoteBuffer(devIdisp); + freeRemoteBuffer(devJa); + freeRemoteBuffer(devVal); + + /*i = writeEllDevice(deviceMat, (void *) val, ja, irn);*/ + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeHllDeviceDoubleComplex",i); + } + return SPGPU_SUCCESS; +} diff --git a/cuda/hlldev.h b/cuda/hlldev.h new file mode 100644 index 00000000..3b47f5ea --- /dev/null +++ b/cuda/hlldev.h @@ -0,0 +1,156 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#ifndef _HLLDEV_H_ +#define _HLLDEV_H_ + +#include "cintrf.h" +#include "hell.h" +#include "vectordev.h" + +typedef struct hlldevice +{ + // Compressed matrix + void *cM; //it can be float or double + + // row pointers (same size of cM) + int *rP; + + // row size and diagonal position + int *rS; + int *diag; + + int *hackOffs; + + int rows; + int avgNzr; + int hackOffsLength; + int nzt; + int hackSize; //must be multiple of 32 + + //matrix size (uncompressed) + //int rows; + //int columns; + + //allocation size + int allocsize; + + /*(i.e. 0 for C, 1 for Fortran)*/ + int baseIndex; +} HllDevice; + +typedef struct hlldeviceparams +{ + + unsigned int elementType; + + unsigned int hackSize; + + // Number of rows. + // Used to allocate rS array + unsigned int rows; + unsigned int avgNzr; + unsigned int nzt; + //unsigned int hackOffsLength; + + // Number of columns. + // Used for error-checking + // unsigned int columns; + + unsigned int allocsize; + + // First index (e.g 0 or 1) + unsigned int firstIndex; + +} HllDeviceParams; + + +HllDeviceParams bldHllDeviceParams(unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, + unsigned int elementType, unsigned int firstIndex); +int getHllDeviceParams(HllDevice* mat, int *hksize, int *rows, int *nzeros, + int *allocsize, int *hackOffsLength, int *firstIndex, int *avgnzr); +int FallocHllDevice(void** deviceMat,unsigned int hksize, unsigned int rows, unsigned int nzeros, + unsigned int allocsize, unsigned int elementType, unsigned int firstIndex); +int allocHllDevice(void ** remoteMatrix, HllDeviceParams* params); +void freeHllDevice(void* remoteMatrix); +int writeHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceFloatComplex(void* deviceMat, float complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int writeHllDeviceDoubleComplex(void* deviceMat, double complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceFloat(void* deviceMat, float* val, int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceDouble(void* deviceMat, double* val, int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceFloatComplex(void* deviceMat, float complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); +int readHllDeviceDoubleComplex(void* deviceMat, double complex* val, + int* ja, int *hkoffs, int* irn, int *idiag); + + +int psiCopyCooToHlgFloat(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + float *val, void *deviceMat); +int psiCopyCooToHlgDouble(int nr, int nc, int nza, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, int *ja, + double *val, void *deviceMat); +int psiCopyCooToHlgFloatComplex(int nr, int nc, int nza, int hacksz, + int noffs, int isz, int *irn, + int *hoffs, int *idisp, int *ja, + float complex *val, void *deviceMat); +int psiCopyCooToHlgDoubleComplex(int nr, int nc, int nza, int hacksz, + int noffs, int isz, int *irn, + int *hoffs, int *idisp, int *ja, + double complex *val, void *deviceMat); + +int psi_cuda_s_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, float *val, + int *idiag, int *rP, float *cM); +int psi_cuda_d_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, double *val, + int *idiag, int *rP, double *cM); +int psi_cuda_c_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, float complex *val, + int *idiag, int *rP, float complex *cM); +int psi_cuda_z_CopyCooToHlg(spgpuHandle_t handle,int nr, int nc, int nza, + int baseIdx, int hacksz, int noffs, int isz, + int *irn, int *hoffs, int *idisp, + int *ja, double complex *val, + int *idiag, int *rP, double complex *cM); + + +#endif diff --git a/cuda/hlldev_mod.F90 b/cuda/hlldev_mod.F90 new file mode 100644 index 00000000..90b8e13c --- /dev/null +++ b/cuda/hlldev_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module hlldev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: hlldev_parms + integer(c_int) :: element_type + integer(c_int) :: hackSize + integer(c_int) :: rows + integer(c_int) :: avgNzr + integer(c_int) :: allocsize + integer(c_int) :: firstIndex + end type hlldev_parms + + interface + function bldHllDeviceParams(hksize, rows, nzeros, allocsize, elementType, firstIndex) & + & result(res) bind(c,name='bldHllDeviceParams') + use iso_c_binding + import :: hlldev_parms + type(hlldev_parms) :: res + integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex + end function BldHllDeviceParams + end interface + + interface + function getHllDeviceParams(deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) & + & result(res) bind(c,name='getHllDeviceParams') + use iso_c_binding + import :: hlldev_parms + integer(c_int) :: res + type(c_ptr), value :: deviceMat + integer(c_int) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + end function GetHllDeviceParams + end interface + + + interface + function FallocHllDevice(deviceMat,hksize,rows, nzeros,allocsize, & + & elementType,firstIndex) & + & result(res) bind(c,name='FallocHllDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: hksize,rows,nzeros,allocsize,elementType,firstIndex + type(c_ptr) :: deviceMat + end function FallocHllDevice + end interface + + + interface writeHllDevice + + function writeHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceFloat + + function writeHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceDouble + + function writeHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceFloatComplex + + function writeHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='writeHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function writeHllDeviceDoubleComplex + + end interface + + interface readHllDevice + + function readHllDeviceFloat(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceFloat + + function readHllDeviceDouble(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceDouble + + function readHllDeviceFloatComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceFloatComplex + + function readHllDeviceDoubleComplex(deviceMat,val,ja,hkoffs,irn,idiag) & + & result(res) bind(c,name='readHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: ja(*),irn(*),hkoffs(*),idiag(*) + end function readHllDeviceDoubleComplex + + end interface + + interface + subroutine freeHllDevice(deviceMat) & + & bind(c,name='freeHllDevice') + use iso_c_binding + type(c_ptr), value :: deviceMat + end subroutine freeHllDevice + end interface + + + interface psi_CopyCooToHlg + function psiCopyCooToHlgFloat(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgFloat') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + real(c_float) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgFloat + function psiCopyCooToHlgDouble(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgDouble') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + real(c_double) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgDouble + function psiCopyCooToHlgFloatComplex(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgFloatComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + complex(c_float_complex) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgFloatComplex + function psiCopyCooToHlgDoubleComplex(nr, nc, nza, hacksz, noffs, isz, irn, & + & hoffs, idisp, ja, val, deviceMat) & + & result(res) bind(c,name='psiCopyCooToHlgDoubleComplex') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: nr,nc,nza,hacksz,noffs,isz + type(c_ptr), value :: deviceMat + complex(c_double_complex) :: val(*) + integer(c_int) :: irn(*), idisp(*), ja(*), hoffs(*) + end function psiCopyCooToHlgDoubleComplex + end interface + + + !interface + ! function getHllDevicePitch(deviceMat) & + ! & bind(c,name='getHllDevicePitch') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getHllDevicePitch + !end interface + + !interface + ! function getHllDeviceMaxRowSize(deviceMat) & + ! & bind(c,name='getHllDeviceMaxRowSize') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getHllDeviceMaxRowSize + !end interface + + interface spmvHllDevice + + function spmvHllDeviceFloat(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceFloat') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_float),value :: alpha, beta + end function spmvHllDeviceFloat + + function spmvHllDeviceDouble(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceDouble') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + real(c_double),value :: alpha, beta + end function spmvHllDeviceDouble + + function spmvHllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceFloatComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_float_complex),value :: alpha, beta + end function spmvHllDeviceFloatComplex + + function spmvHllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + & result(res) bind(c,name='spmvHllDeviceDoubleComplex') + use iso_c_binding + integer(c_int) :: res + type(c_ptr), value :: deviceMat, x, y + complex(c_double_complex),value :: alpha, beta + end function spmvHllDeviceDoubleComplex + + end interface + +end module hlldev_mod diff --git a/cuda/impl/Makefile b/cuda/impl/Makefile new file mode 100755 index 00000000..12bf0747 --- /dev/null +++ b/cuda/impl/Makefile @@ -0,0 +1,297 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR) +LIBNAME=libpsb_gpu.a +CXXDEFINES=$(PSBCXXDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +CDEFINES=$(PSBCDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) +FDEFINES=$(PSBFDEFINES) $(SPGPU_DEFINES) $(CUDA_DEFINES) + +OBJS= \ +psb_d_cuda_cp_csrg_from_coo.o \ +psb_d_cuda_cp_csrg_from_fmt.o \ +psb_d_cuda_cp_elg_from_coo.o \ +psb_d_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_cp_csrg_from_coo.o \ +psb_s_cuda_cp_csrg_from_fmt.o \ +psb_s_cuda_csrg_allocate_mnnz.o \ +psb_s_cuda_csrg_csmm.o \ +psb_s_cuda_csrg_csmv.o \ +psb_s_cuda_csrg_mold.o \ +psb_s_cuda_csrg_reallocate_nz.o \ +psb_s_cuda_csrg_scal.o \ +psb_s_cuda_csrg_scals.o \ +psb_s_cuda_csrg_from_gpu.o \ +psb_s_cuda_csrg_to_gpu.o \ +psb_s_cuda_csrg_vect_mv.o \ +psb_s_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_csrg_allocate_mnnz.o \ +psb_d_cuda_csrg_csmm.o \ +psb_d_cuda_csrg_csmv.o \ +psb_d_cuda_csrg_mold.o \ +psb_d_cuda_csrg_reallocate_nz.o \ +psb_d_cuda_csrg_scal.o \ +psb_d_cuda_csrg_scals.o \ +psb_d_cuda_csrg_from_gpu.o \ +psb_d_cuda_csrg_to_gpu.o \ +psb_d_cuda_csrg_vect_mv.o \ +psb_d_cuda_csrg_inner_vect_sv.o \ +psb_d_cuda_elg_allocate_mnnz.o \ +psb_d_cuda_elg_asb.o \ +psb_d_cuda_elg_csmm.o \ +psb_d_cuda_elg_csmv.o \ +psb_d_cuda_elg_csput.o \ +psb_d_cuda_elg_from_gpu.o \ +psb_d_cuda_elg_inner_vect_sv.o \ +psb_d_cuda_elg_mold.o \ +psb_d_cuda_elg_reallocate_nz.o \ +psb_d_cuda_elg_scal.o \ +psb_d_cuda_elg_scals.o \ +psb_d_cuda_elg_to_gpu.o \ +psb_d_cuda_elg_vect_mv.o \ +psb_d_cuda_mv_csrg_from_coo.o \ +psb_d_cuda_mv_csrg_from_fmt.o \ +psb_d_cuda_mv_elg_from_coo.o \ +psb_d_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_mv_csrg_from_coo.o \ +psb_s_cuda_mv_csrg_from_fmt.o \ +psb_s_cuda_cp_elg_from_coo.o \ +psb_s_cuda_cp_elg_from_fmt.o \ +psb_s_cuda_elg_allocate_mnnz.o \ +psb_s_cuda_elg_asb.o \ +psb_s_cuda_elg_csmm.o \ +psb_s_cuda_elg_csmv.o \ +psb_s_cuda_elg_csput.o \ +psb_s_cuda_elg_inner_vect_sv.o \ +psb_s_cuda_elg_mold.o \ +psb_s_cuda_elg_reallocate_nz.o \ +psb_s_cuda_elg_scal.o \ +psb_s_cuda_elg_scals.o \ +psb_s_cuda_elg_to_gpu.o \ +psb_s_cuda_elg_from_gpu.o \ +psb_s_cuda_elg_vect_mv.o \ +psb_s_cuda_mv_elg_from_coo.o \ +psb_s_cuda_mv_elg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_fmt.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_fmt.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_hlg_allocate_mnnz.o \ +psb_d_cuda_hlg_csmm.o \ +psb_d_cuda_hlg_csmv.o \ +psb_d_cuda_hlg_inner_vect_sv.o \ +psb_d_cuda_hlg_mold.o \ +psb_d_cuda_hlg_reallocate_nz.o \ +psb_d_cuda_hlg_scal.o \ +psb_d_cuda_hlg_scals.o \ +psb_d_cuda_hlg_from_gpu.o \ +psb_d_cuda_hlg_to_gpu.o \ +psb_d_cuda_hlg_vect_mv.o \ +psb_s_cuda_hlg_allocate_mnnz.o \ +psb_s_cuda_hlg_csmm.o \ +psb_s_cuda_hlg_csmv.o \ +psb_s_cuda_hlg_inner_vect_sv.o \ +psb_s_cuda_hlg_mold.o \ +psb_s_cuda_hlg_reallocate_nz.o \ +psb_s_cuda_hlg_scal.o \ +psb_s_cuda_hlg_scals.o \ +psb_s_cuda_hlg_from_gpu.o \ +psb_s_cuda_hlg_to_gpu.o \ +psb_s_cuda_hlg_vect_mv.o \ +psb_s_cuda_mv_hlg_from_coo.o \ +psb_s_cuda_cp_hlg_from_coo.o \ +psb_s_cuda_mv_hlg_from_fmt.o \ +psb_d_cuda_mv_hlg_from_coo.o \ +psb_d_cuda_cp_hlg_from_coo.o \ +psb_d_cuda_mv_hlg_from_fmt.o \ +psb_s_cuda_hybg_allocate_mnnz.o \ +psb_s_cuda_hybg_csmm.o \ +psb_s_cuda_hybg_csmv.o \ +psb_s_cuda_hybg_reallocate_nz.o \ +psb_s_cuda_hybg_scal.o \ +psb_s_cuda_hybg_scals.o \ +psb_s_cuda_hybg_to_gpu.o \ +psb_s_cuda_hybg_vect_mv.o \ +psb_s_cuda_hybg_inner_vect_sv.o \ +psb_s_cuda_cp_hybg_from_coo.o \ +psb_s_cuda_cp_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_fmt.o \ +psb_s_cuda_mv_hybg_from_coo.o \ +psb_s_cuda_hybg_mold.o \ +psb_d_cuda_hybg_allocate_mnnz.o \ +psb_d_cuda_hybg_csmm.o \ +psb_d_cuda_hybg_csmv.o \ +psb_d_cuda_hybg_reallocate_nz.o \ +psb_d_cuda_hybg_scal.o \ +psb_d_cuda_hybg_scals.o \ +psb_d_cuda_hybg_to_gpu.o \ +psb_d_cuda_hybg_vect_mv.o \ +psb_d_cuda_hybg_inner_vect_sv.o \ +psb_d_cuda_cp_hybg_from_coo.o \ +psb_d_cuda_cp_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_fmt.o \ +psb_d_cuda_mv_hybg_from_coo.o \ +psb_d_cuda_hybg_mold.o \ +psb_z_cuda_cp_csrg_from_coo.o \ +psb_z_cuda_cp_csrg_from_fmt.o \ +psb_z_cuda_cp_elg_from_coo.o \ +psb_z_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_cp_csrg_from_coo.o \ +psb_c_cuda_cp_csrg_from_fmt.o \ +psb_c_cuda_csrg_allocate_mnnz.o \ +psb_c_cuda_csrg_csmm.o \ +psb_c_cuda_csrg_csmv.o \ +psb_c_cuda_csrg_mold.o \ +psb_c_cuda_csrg_reallocate_nz.o \ +psb_c_cuda_csrg_scal.o \ +psb_c_cuda_csrg_scals.o \ +psb_c_cuda_csrg_from_gpu.o \ +psb_c_cuda_csrg_to_gpu.o \ +psb_c_cuda_csrg_vect_mv.o \ +psb_c_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_csrg_allocate_mnnz.o \ +psb_z_cuda_csrg_csmm.o \ +psb_z_cuda_csrg_csmv.o \ +psb_z_cuda_csrg_mold.o \ +psb_z_cuda_csrg_reallocate_nz.o \ +psb_z_cuda_csrg_scal.o \ +psb_z_cuda_csrg_scals.o \ +psb_z_cuda_csrg_from_gpu.o \ +psb_z_cuda_csrg_to_gpu.o \ +psb_z_cuda_csrg_vect_mv.o \ +psb_z_cuda_csrg_inner_vect_sv.o \ +psb_z_cuda_elg_allocate_mnnz.o \ +psb_z_cuda_elg_asb.o \ +psb_z_cuda_elg_csmm.o \ +psb_z_cuda_elg_csmv.o \ +psb_z_cuda_elg_csput.o \ +psb_z_cuda_elg_inner_vect_sv.o \ +psb_z_cuda_elg_mold.o \ +psb_z_cuda_elg_reallocate_nz.o \ +psb_z_cuda_elg_scal.o \ +psb_z_cuda_elg_scals.o \ +psb_z_cuda_elg_to_gpu.o \ +psb_z_cuda_elg_from_gpu.o \ +psb_z_cuda_elg_vect_mv.o \ +psb_z_cuda_mv_csrg_from_coo.o \ +psb_z_cuda_mv_csrg_from_fmt.o \ +psb_z_cuda_mv_elg_from_coo.o \ +psb_z_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_mv_csrg_from_coo.o \ +psb_c_cuda_mv_csrg_from_fmt.o \ +psb_c_cuda_cp_elg_from_coo.o \ +psb_c_cuda_cp_elg_from_fmt.o \ +psb_c_cuda_elg_allocate_mnnz.o \ +psb_c_cuda_elg_asb.o \ +psb_c_cuda_elg_csmm.o \ +psb_c_cuda_elg_csmv.o \ +psb_c_cuda_elg_csput.o \ +psb_c_cuda_elg_inner_vect_sv.o \ +psb_c_cuda_elg_mold.o \ +psb_c_cuda_elg_reallocate_nz.o \ +psb_c_cuda_elg_scal.o \ +psb_c_cuda_elg_scals.o \ +psb_c_cuda_elg_to_gpu.o \ +psb_c_cuda_elg_from_gpu.o \ +psb_c_cuda_elg_vect_mv.o \ +psb_c_cuda_mv_elg_from_coo.o \ +psb_c_cuda_mv_elg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_fmt.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_fmt.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_hlg_allocate_mnnz.o \ +psb_z_cuda_hlg_csmm.o \ +psb_z_cuda_hlg_csmv.o \ +psb_z_cuda_hlg_inner_vect_sv.o \ +psb_z_cuda_hlg_mold.o \ +psb_z_cuda_hlg_reallocate_nz.o \ +psb_z_cuda_hlg_scal.o \ +psb_z_cuda_hlg_scals.o \ +psb_z_cuda_hlg_from_gpu.o \ +psb_z_cuda_hlg_to_gpu.o \ +psb_z_cuda_hlg_vect_mv.o \ +psb_c_cuda_hlg_allocate_mnnz.o \ +psb_c_cuda_hlg_csmm.o \ +psb_c_cuda_hlg_csmv.o \ +psb_c_cuda_hlg_inner_vect_sv.o \ +psb_c_cuda_hlg_mold.o \ +psb_c_cuda_hlg_reallocate_nz.o \ +psb_c_cuda_hlg_scal.o \ +psb_c_cuda_hlg_scals.o \ +psb_c_cuda_hlg_from_gpu.o \ +psb_c_cuda_hlg_to_gpu.o \ +psb_c_cuda_hlg_vect_mv.o \ +psb_c_cuda_mv_hlg_from_coo.o \ +psb_c_cuda_cp_hlg_from_coo.o \ +psb_c_cuda_mv_hlg_from_fmt.o \ +psb_z_cuda_mv_hlg_from_coo.o \ +psb_z_cuda_cp_hlg_from_coo.o \ +psb_z_cuda_mv_hlg_from_fmt.o \ +psb_c_cuda_hybg_allocate_mnnz.o \ +psb_c_cuda_hybg_csmm.o \ +psb_c_cuda_hybg_csmv.o \ +psb_c_cuda_hybg_reallocate_nz.o \ +psb_c_cuda_hybg_scal.o \ +psb_c_cuda_hybg_scals.o \ +psb_c_cuda_hybg_to_gpu.o \ +psb_c_cuda_hybg_vect_mv.o \ +psb_c_cuda_hybg_inner_vect_sv.o \ +psb_c_cuda_cp_hybg_from_coo.o \ +psb_c_cuda_cp_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_fmt.o \ +psb_c_cuda_mv_hybg_from_coo.o \ +psb_c_cuda_hybg_mold.o \ +psb_z_cuda_hybg_allocate_mnnz.o \ +psb_z_cuda_hybg_csmm.o \ +psb_z_cuda_hybg_csmv.o \ +psb_z_cuda_hybg_reallocate_nz.o \ +psb_z_cuda_hybg_scal.o \ +psb_z_cuda_hybg_scals.o \ +psb_z_cuda_hybg_to_gpu.o \ +psb_z_cuda_hybg_vect_mv.o \ +psb_z_cuda_hybg_inner_vect_sv.o \ +psb_z_cuda_cp_hybg_from_coo.o \ +psb_z_cuda_cp_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_fmt.o \ +psb_z_cuda_mv_hybg_from_coo.o \ +psb_z_cuda_hybg_mold.o \ +psb_d_cuda_cp_diag_from_coo.o \ +psb_d_cuda_mv_diag_from_coo.o \ +psb_d_cuda_diag_to_gpu.o \ +psb_d_cuda_diag_csmv.o \ +psb_d_cuda_diag_mold.o \ +psb_d_cuda_diag_vect_mv.o \ +psb_d_cuda_cp_hdiag_from_coo.o \ +psb_d_cuda_mv_hdiag_from_coo.o \ +psb_d_cuda_hdiag_to_gpu.o \ +psb_d_cuda_hdiag_csmv.o \ +psb_d_cuda_hdiag_mold.o \ +psb_d_cuda_hdiag_vect_mv.o \ +psb_s_cuda_cp_hdiag_from_coo.o \ +psb_s_cuda_mv_hdiag_from_coo.o \ +psb_s_cuda_hdiag_to_gpu.o \ +psb_s_cuda_hdiag_csmv.o \ +psb_s_cuda_hdiag_mold.o \ +psb_s_cuda_hdiag_vect_mv.o \ +psb_s_cuda_dnsg_mat_impl.o \ +psb_d_cuda_dnsg_mat_impl.o \ +psb_c_cuda_dnsg_mat_impl.o \ +psb_z_cuda_dnsg_mat_impl.o + + +objs: $(OBJS) +lib: objs + ar cur ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..aa6a3ba3 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_coo.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_coo + implicit none + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..e9d42139 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_csrg_from_fmt.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_cp_csrg_from_fmt + !use iso_c_binding + implicit none + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_c_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 new file mode 100644 index 00000000..e70a044e --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_diag_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_cp_diag_from_coo + implicit none + + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_c_dia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 new file mode 100644 index 00000000..c6105e88 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_elg_from_coo.F90 @@ -0,0 +1,161 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_coo + use psi_ext_util_mod + use psb_cuda_env_mod + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ + hacksize = max(1,psb_cuda_WarpSize()) + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + + call psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() + else + call b%cp_to_coo(tmp,info) + call psi_c_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_c_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_c_count_ell_from_coo + +end subroutine psb_c_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..f7e5351e --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_elg_from_fmt.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_cp_elg_from_fmt + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(elldev_parms) :: gpu_parms + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_c_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza + call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..9be741c9 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hdiag_from_coo.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod + implicit none + + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_c_hdia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..8b0d9f2a --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_coo.F90 @@ -0,0 +1,190 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_coo + implicit none + + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + hksz = max(1,psb_cuda_WarpSize()) + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_c_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..96a5c5e8 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_cp_hlg_from_fmt + implicit none + + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_hll_sparse_mat%cp_from_fmt(b,info) + if (info == 0) call a%to_gpu(info) + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..d29da0b7 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_coo + implicit none + + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_cp_hybg_from_coo +#endif diff --git a/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..9ed53040 --- /dev/null +++ b/cuda/impl/psb_c_cuda_cp_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_cp_hybg_from_fmt + implicit none + + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_c_cuda_cp_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..f1e002f3 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_c_cuda_csrg_csmm.F90 b/cuda/impl/psb_c_cuda_csrg_csmm.F90 new file mode 100644 index 00000000..b3012952 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_csmm + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_csrg_csmv + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_csrg_from_gpu + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() + +end subroutine psb_c_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..7e5bb614 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_inner_vect_sv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_inner_vect_sv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_c_cuda_csrg_mold.F90 b/cuda/impl/psb_c_cuda_csrg_mold.F90 new file mode 100644 index 00000000..405f2736 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_mold(a,b,info) + + use psb_base_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_mold + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_mold diff --git a/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..a757f477 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_reallocate_nz.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_cuda_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_c_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_c_cuda_csrg_scal.F90 b/cuda/impl/psb_c_cuda_csrg_scal.F90 new file mode 100644 index 00000000..13716339 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_scal.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scal + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_c_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_scal diff --git a/cuda/impl/psb_c_cuda_csrg_scals.F90 b/cuda/impl/psb_c_cuda_csrg_scals.F90 new file mode 100644 index 00000000..5334be3d --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_scals.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_scals + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_c_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_csrg_scals diff --git a/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 new file mode 100644 index 00000000..cc3fbaaf --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_to_gpu.F90 @@ -0,0 +1,378 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_to_gpu + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_spk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if (CUDA_SHORT_VERSION <= 10 ) + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#elif 0 + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if + +end subroutine psb_c_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 new file mode 100644 index 00000000..c58e7ec0 --- /dev/null +++ b/cuda/impl/psb_c_cuda_csrg_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_csrg_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_c_cuda_diag_csmv.F90 b/cuda/impl/psb_c_cuda_diag_csmv.F90 new file mode 100644 index 00000000..c0940903 --- /dev/null +++ b/cuda/impl/psb_c_cuda_diag_csmv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_csmv + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_cuda_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_diag_mold + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_diag_mold diff --git a/cuda/impl/psb_c_cuda_diag_to_gpu.F90 b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 new file mode 100644 index 00000000..88bbd8b5 --- /dev/null +++ b/cuda/impl/psb_c_cuda_diag_to_gpu.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cuda_diag_to_gpu(a,info,nzrm) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_to_gpu + use iso_c_binding + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d + type(diagdev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_float) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_float) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 + +end subroutine psb_c_cuda_diag_to_gpu diff --git a/cuda/impl/psb_c_cuda_diag_vect_mv.F90 b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 new file mode 100644 index 00000000..fba22bc5 --- /dev/null +++ b/cuda/impl/psb_c_cuda_diag_vect_mv.F90 @@ -0,0 +1,116 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_diag_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_diag_vect_mv diff --git a/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 new file mode 100644 index 00000000..7aaa37f1 --- /dev/null +++ b/cuda/impl/psb_c_cuda_dnsg_mat_impl.F90 @@ -0,0 +1,428 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_vect_mv + implicit none + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + complex(psb_spk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='c_cuda_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_dnsg_vect_mv + + +subroutine psb_c_cuda_dnsg_mold(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_mold + implicit none + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz +!!$ end interface + +subroutine psb_c_cuda_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_dnsg_to_gpu + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='c_cuda_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_complex_float,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_dnsg_to_gpu + + + +subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_coo + implicit none + + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_c_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_c_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_cp_dnsg_from_coo + +subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_cp_dnsg_from_fmt + implicit none + + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_c_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_cp_dnsg_from_fmt + + + +subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_coo + implicit none + + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='c_cuda_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_mv_dnsg_from_coo + +subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_c_cuda_vect_mod + use dnsdev_mod + use psb_c_vectordev_mod + use psb_c_cuda_dnsg_mat_mod, psb_protect_name => psb_c_cuda_mv_dnsg_from_fmt + implicit none + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_c_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_c_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..7f50d547 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_allocate_mnnz.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(elldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_float,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_c_cuda_elg_asb.f90 b/cuda/impl/psb_c_cuda_elg_asb.f90 new file mode 100644 index 00000000..16d70736 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_asb.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_asb(a) + + use psb_base_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_asb + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_asb diff --git a/cuda/impl/psb_c_cuda_elg_csmm.F90 b/cuda/impl/psb_c_cuda_elg_csmm.F90 new file mode 100644 index 00000000..f7ae9892 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_csmm.F90 @@ -0,0 +1,124 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csmm + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_elg_csmv + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_elg_csput_a + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_csput_a + + + +subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding + use elldev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_csput_v + use psb_c_cuda_vect_mod + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_cuda) + select type (ja) + class is (psb_i_vect_cuda) + select type (val) + class is (psb_c_vect_cuda) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceFloatComplex(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_c_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_c_cuda_elg_csput_v diff --git a/cuda/impl/psb_c_cuda_elg_from_gpu.F90 b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 new file mode 100644 index 00000000..34c6e4a6 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_from_gpu.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_from_gpu(a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_from_gpu + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_c_cuda_elg_from_gpu diff --git a/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..148d72d2 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_inner_vect_sv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_c_cuda_elg_mold.F90 b/cuda/impl/psb_c_cuda_elg_mold.F90 new file mode 100644 index 00000000..bb94bf07 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_elg_mold(a,b,info) + + use psb_base_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_mold + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_mold diff --git a/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 new file mode 100644 index 00000000..6a1f8763 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_reallocate_nz.F90 @@ -0,0 +1,72 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_reallocate_nz(nz,a) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_cuda_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_c_cuda_elg_scal.F90 b/cuda/impl/psb_c_cuda_elg_scal.F90 new file mode 100644 index 00000000..65f84768 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_scal.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_scal(d,a,info,side) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scal + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_scal diff --git a/cuda/impl/psb_c_cuda_elg_scals.F90 b/cuda/impl/psb_c_cuda_elg_scals.F90 new file mode 100644 index 00000000..966f2e91 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_scals(d,a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_scals + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_scals diff --git a/cuda/impl/psb_c_cuda_elg_to_gpu.F90 b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 new file mode 100644 index 00000000..495207c7 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_to_gpu.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_to_gpu(a,info,nzrm) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_to_gpu + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt + type(elldev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_float,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_float,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_c_cuda_elg_to_gpu diff --git a/cuda/impl/psb_c_cuda_elg_trim.f90 b/cuda/impl/psb_c_cuda_elg_trim.f90 new file mode 100644 index 00000000..78dbe193 --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_trim.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_trim(a) + + use psb_base_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_trim + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_trim diff --git a/cuda/impl/psb_c_cuda_elg_vect_mv.F90 b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 new file mode 100644 index 00000000..9da6a34a --- /dev/null +++ b/cuda/impl/psb_c_cuda_elg_vect_mv.F90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_elg_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_elg_vect_mv diff --git a/cuda/impl/psb_c_cuda_hdiag_csmv.F90 b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 new file mode 100644 index 00000000..36928062 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hdiag_csmv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_csmv + implicit none + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_cuda_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hdiag_mold + implicit none + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hdiag_mold diff --git a/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 new file mode 100644 index 00000000..8d1b61a1 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hdiag_to_gpu.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hdiag_to_gpu(a,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_to_gpu + use iso_c_binding + implicit none + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight + type(hdiagdev_parms) :: gpu_parms + + info = 0 + + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +end subroutine psb_c_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 new file mode 100644 index 00000000..0c7ce856 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hdiag_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_hdiag_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..1b41f132 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(hlldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_hll_sparse_mat%allocate(m,n,nz) + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_c_cuda_hlg_csmm.F90 b/cuda/impl/psb_c_cuda_hlg_csmm.F90 new file mode 100644 index 00000000..88aa53a8 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_csmm.F90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_csmm + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hlg_csmv + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='c_cuda_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hlg_from_gpu + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_c_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..87d7c662 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_inner_vect_sv.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_inner_vect_sv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_base_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_c_cuda_hlg_mold.F90 b/cuda/impl/psb_c_cuda_hlg_mold.F90 new file mode 100644 index 00000000..a702e211 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_mold(a,b,info) + + use psb_base_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_mold + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_cuda_hlg_mold diff --git a/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..2ec5fa2c --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_reallocate_nz.F90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_reallocate_nz + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_cuda_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_c_hll_sparse_mat%reallocate(nz) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_c_cuda_hlg_scal.F90 b/cuda/impl/psb_c_cuda_hlg_scal.F90 new file mode 100644 index 00000000..770d1734 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_scal.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_scal(d,a,info,side) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scal + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_scal diff --git a/cuda/impl/psb_c_cuda_hlg_scals.F90 b/cuda/impl/psb_c_cuda_hlg_scals.F90 new file mode 100644 index 00000000..ef6bc1e3 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_scals(d,a,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_scals + use iso_c_binding + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_c_cuda_hlg_scals diff --git a/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 new file mode 100644 index 00000000..d7d179e7 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_to_gpu.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_to_gpu + use iso_c_binding + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_float,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 + +end subroutine psb_c_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 new file mode 100644 index 00000000..3789ef17 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hlg_vect_mv.F90 @@ -0,0 +1,119 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_hlg_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_c_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..bc9f3889 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_allocate_mnnz.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_allocate_mnnz +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_csmm.F90 b/cuda/impl/psb_c_cuda_hybg_csmm.F90 new file mode 100644 index 00000000..227b7d5c --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_csmm + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hybg_csmv + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_cuda_hybg_inner_vect_sv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=czero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_inner_vect_sv +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_mold.F90 b/cuda/impl/psb_c_cuda_hybg_mold.F90 new file mode 100644 index 00000000..6fe4c378 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_mold(a,b,info) + + use psb_base_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_mold + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_cuda_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_mold +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..15f33077 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_reallocate_nz.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_cuda_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_c_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_reallocate_nz +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_scal.F90 b/cuda/impl/psb_c_cuda_hybg_scal.F90 new file mode 100644 index 00000000..ff1c1515 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_scal.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scal + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_c_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_scal +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_scals.F90 b/cuda/impl/psb_c_cuda_hybg_scals.F90 new file mode 100644 index 00000000..1b2ddf32 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_scals.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_scals + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_c_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_scals +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 new file mode 100644 index 00000000..15a65abc --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_to_gpu.F90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_to_gpu + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_spk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = cone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = cone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if + +end subroutine psb_c_cuda_hybg_to_gpu +#endif diff --git a/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 new file mode 100644 index 00000000..58ce0386 --- /dev/null +++ b/cuda/impl/psb_c_cuda_hybg_vect_mv.F90 @@ -0,0 +1,118 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_hybg_vect_mv + use psb_c_cuda_vect_mod + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_cuda_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= czero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_c_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_c_vect_cuda) + select type(yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= czero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_c_cuda_hybg_vect_mv +#endif diff --git a/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..f0a74c09 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_coo + implicit none + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..eb3698b0 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_csrg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_csrg_mat_mod, psb_protect_name => psb_c_cuda_mv_csrg_from_fmt + implicit none + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_c_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 new file mode 100644 index 00000000..c1ee2ba9 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_c_cuda_diag_mat_mod, psb_protect_name => psb_c_cuda_mv_diag_from_coo + + implicit none + + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 new file mode 100644 index 00000000..f9555729 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_elg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_coo + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..59615e25 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_elg_from_fmt.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_c_cuda_elg_mat_mod, psb_protect_name => psb_c_cuda_mv_elg_from_fmt + implicit none + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + type(elldev_parms) :: gpu_parms + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_c_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() + call a%to_gpu(info) + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..21ee731d --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hdiag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_c_cuda_hdiag_mat_mod, psb_protect_name => psb_c_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod + + implicit none + + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_c_hdia_sparse_mat%mv_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..50c20fad --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_coo + implicit none + + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..3fba905a --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hlg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_c_cuda_hlg_mat_mod, psb_protect_name => psb_c_cuda_mv_hlg_from_fmt + implicit none + + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..f7cdae40 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_coo.F90 @@ -0,0 +1,59 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_coo + implicit none + + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_c_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cuda_mv_hybg_from_coo +#endif diff --git a/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..9be9f0b4 --- /dev/null +++ b/cuda/impl/psb_c_cuda_mv_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_c_cuda_hybg_mat_mod, psb_protect_name => psb_c_cuda_mv_hybg_from_fmt + implicit none + + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select +end subroutine psb_c_cuda_mv_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..ab3a7256 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_coo.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_coo + implicit none + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..8f8e8cbe --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_csrg_from_fmt.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_cp_csrg_from_fmt + !use iso_c_binding + implicit none + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_d_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 new file mode 100644 index 00000000..dc0401d5 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_diag_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_cp_diag_from_coo + implicit none + + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_d_dia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 new file mode 100644 index 00000000..890bdc39 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_elg_from_coo.F90 @@ -0,0 +1,161 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_coo + use psi_ext_util_mod + use psb_cuda_env_mod + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ + hacksize = max(1,psb_cuda_WarpSize()) + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + + call psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() + else + call b%cp_to_coo(tmp,info) + call psi_d_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_d_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_d_count_ell_from_coo + +end subroutine psb_d_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..7beea7f1 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_elg_from_fmt.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_cp_elg_from_fmt + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(elldev_parms) :: gpu_parms + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_d_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza + call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..82ef4876 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hdiag_from_coo.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod + implicit none + + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_d_hdia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..34b999a9 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_coo.F90 @@ -0,0 +1,190 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_coo + implicit none + + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + hksz = max(1,psb_cuda_WarpSize()) + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_d_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..ecb15157 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_cp_hlg_from_fmt + implicit none + + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_hll_sparse_mat%cp_from_fmt(b,info) + if (info == 0) call a%to_gpu(info) + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..b236a91d --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_coo + implicit none + + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_cp_hybg_from_coo +#endif diff --git a/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..87d25252 --- /dev/null +++ b/cuda/impl/psb_d_cuda_cp_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_cp_hybg_from_fmt + implicit none + + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_d_cuda_cp_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..056f2deb --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_d_cuda_csrg_csmm.F90 b/cuda/impl/psb_d_cuda_csrg_csmm.F90 new file mode 100644 index 00000000..ddac1373 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_csmm + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_csrg_csmv + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_csrg_from_gpu + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() + +end subroutine psb_d_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..9a45ee17 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_inner_vect_sv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_inner_vect_sv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_d_cuda_csrg_mold.F90 b/cuda/impl/psb_d_cuda_csrg_mold.F90 new file mode 100644 index 00000000..eec34975 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_mold(a,b,info) + + use psb_base_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_mold + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_mold diff --git a/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..c27cb943 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_reallocate_nz.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_cuda_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_d_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_d_cuda_csrg_scal.F90 b/cuda/impl/psb_d_cuda_csrg_scal.F90 new file mode 100644 index 00000000..860e9396 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_scal.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scal + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_d_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_scal diff --git a/cuda/impl/psb_d_cuda_csrg_scals.F90 b/cuda/impl/psb_d_cuda_csrg_scals.F90 new file mode 100644 index 00000000..87ef588d --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_scals.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_scals + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_d_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_csrg_scals diff --git a/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 new file mode 100644 index 00000000..d7a1b1e7 --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_to_gpu.F90 @@ -0,0 +1,378 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_to_gpu + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_dpk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if (CUDA_SHORT_VERSION <= 10 ) + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#elif 0 + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if + +end subroutine psb_d_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 new file mode 100644 index 00000000..03fefbdd --- /dev/null +++ b/cuda/impl/psb_d_cuda_csrg_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_csrg_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_d_cuda_diag_csmv.F90 b/cuda/impl/psb_d_cuda_diag_csmv.F90 new file mode 100644 index 00000000..0317a369 --- /dev/null +++ b/cuda/impl/psb_d_cuda_diag_csmv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_csmv + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_cuda_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_diag_mold + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_diag_mold diff --git a/cuda/impl/psb_d_cuda_diag_to_gpu.F90 b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 new file mode 100644 index 00000000..9b648962 --- /dev/null +++ b/cuda/impl/psb_d_cuda_diag_to_gpu.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cuda_diag_to_gpu(a,info,nzrm) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_to_gpu + use iso_c_binding + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d + type(diagdev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_double) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_double) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 + +end subroutine psb_d_cuda_diag_to_gpu diff --git a/cuda/impl/psb_d_cuda_diag_vect_mv.F90 b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 new file mode 100644 index 00000000..3bc2372d --- /dev/null +++ b/cuda/impl/psb_d_cuda_diag_vect_mv.F90 @@ -0,0 +1,116 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_diag_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_diag_vect_mv diff --git a/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 new file mode 100644 index 00000000..e90e816a --- /dev/null +++ b/cuda/impl/psb_d_cuda_dnsg_mat_impl.F90 @@ -0,0 +1,428 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_vect_mv + implicit none + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + real(psb_dpk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='d_cuda_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_dnsg_vect_mv + + +subroutine psb_d_cuda_dnsg_mold(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_mold + implicit none + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz +!!$ end interface + +subroutine psb_d_cuda_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_dnsg_to_gpu + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='d_cuda_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_double,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_dnsg_to_gpu + + + +subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_coo + implicit none + + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_d_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_d_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_cp_dnsg_from_coo + +subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_cp_dnsg_from_fmt + implicit none + + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_d_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_cp_dnsg_from_fmt + + + +subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_coo + implicit none + + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='d_cuda_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_mv_dnsg_from_coo + +subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_d_cuda_vect_mod + use dnsdev_mod + use psb_d_vectordev_mod + use psb_d_cuda_dnsg_mat_mod, psb_protect_name => psb_d_cuda_mv_dnsg_from_fmt + implicit none + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_d_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_d_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..6db20c96 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_allocate_mnnz.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(elldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_double,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_d_cuda_elg_asb.f90 b/cuda/impl/psb_d_cuda_elg_asb.f90 new file mode 100644 index 00000000..7d510ee2 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_asb.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_asb(a) + + use psb_base_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_asb + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_asb diff --git a/cuda/impl/psb_d_cuda_elg_csmm.F90 b/cuda/impl/psb_d_cuda_elg_csmm.F90 new file mode 100644 index 00000000..f77d72d8 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_csmm.F90 @@ -0,0 +1,124 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csmm + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_elg_csmv + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_elg_csput_a + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_csput_a + + + +subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding + use elldev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_csput_v + use psb_d_cuda_vect_mod + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_cuda) + select type (ja) + class is (psb_i_vect_cuda) + select type (val) + class is (psb_d_vect_cuda) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceDouble(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_d_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_d_cuda_elg_csput_v diff --git a/cuda/impl/psb_d_cuda_elg_from_gpu.F90 b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 new file mode 100644 index 00000000..720a6d73 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_from_gpu.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_from_gpu(a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_from_gpu + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_d_cuda_elg_from_gpu diff --git a/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..5e5d72ef --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_inner_vect_sv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_d_cuda_elg_mold.F90 b/cuda/impl/psb_d_cuda_elg_mold.F90 new file mode 100644 index 00000000..107f19af --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_elg_mold(a,b,info) + + use psb_base_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_mold + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_mold diff --git a/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 new file mode 100644 index 00000000..47464760 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_reallocate_nz.F90 @@ -0,0 +1,72 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_reallocate_nz(nz,a) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_cuda_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_d_cuda_elg_scal.F90 b/cuda/impl/psb_d_cuda_elg_scal.F90 new file mode 100644 index 00000000..420c710e --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_scal.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_scal(d,a,info,side) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scal + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_scal diff --git a/cuda/impl/psb_d_cuda_elg_scals.F90 b/cuda/impl/psb_d_cuda_elg_scals.F90 new file mode 100644 index 00000000..ff22002e --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_scals(d,a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_scals + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_scals diff --git a/cuda/impl/psb_d_cuda_elg_to_gpu.F90 b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 new file mode 100644 index 00000000..9b88af69 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_to_gpu.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_to_gpu(a,info,nzrm) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_to_gpu + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt + type(elldev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_double,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_double,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_d_cuda_elg_to_gpu diff --git a/cuda/impl/psb_d_cuda_elg_trim.f90 b/cuda/impl/psb_d_cuda_elg_trim.f90 new file mode 100644 index 00000000..a371c673 --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_trim.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_trim(a) + + use psb_base_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_trim + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_trim diff --git a/cuda/impl/psb_d_cuda_elg_vect_mv.F90 b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 new file mode 100644 index 00000000..f0b83c2b --- /dev/null +++ b/cuda/impl/psb_d_cuda_elg_vect_mv.F90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_elg_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_elg_vect_mv diff --git a/cuda/impl/psb_d_cuda_hdiag_csmv.F90 b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 new file mode 100644 index 00000000..bf4dacc1 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hdiag_csmv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_csmv + implicit none + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_cuda_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hdiag_mold + implicit none + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hdiag_mold diff --git a/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 new file mode 100644 index 00000000..73c4a47d --- /dev/null +++ b/cuda/impl/psb_d_cuda_hdiag_to_gpu.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hdiag_to_gpu(a,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_to_gpu + use iso_c_binding + implicit none + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight + type(hdiagdev_parms) :: gpu_parms + + info = 0 + + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +end subroutine psb_d_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 new file mode 100644 index 00000000..c18c80ac --- /dev/null +++ b/cuda/impl/psb_d_cuda_hdiag_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_hdiag_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..68d9ab50 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(hlldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_hll_sparse_mat%allocate(m,n,nz) + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_d_cuda_hlg_csmm.F90 b/cuda/impl/psb_d_cuda_hlg_csmm.F90 new file mode 100644 index 00000000..ee8424e6 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_csmm.F90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_csmm + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hlg_csmv + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_cuda_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hlg_from_gpu + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_d_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..f4a0424d --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_inner_vect_sv.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_inner_vect_sv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_base_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_d_cuda_hlg_mold.F90 b/cuda/impl/psb_d_cuda_hlg_mold.F90 new file mode 100644 index 00000000..e41d56f2 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_mold(a,b,info) + + use psb_base_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_mold + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_cuda_hlg_mold diff --git a/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..e696d304 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_reallocate_nz.F90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_reallocate_nz + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_cuda_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_d_hll_sparse_mat%reallocate(nz) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_d_cuda_hlg_scal.F90 b/cuda/impl/psb_d_cuda_hlg_scal.F90 new file mode 100644 index 00000000..042e5805 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_scal.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_scal(d,a,info,side) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scal + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_scal diff --git a/cuda/impl/psb_d_cuda_hlg_scals.F90 b/cuda/impl/psb_d_cuda_hlg_scals.F90 new file mode 100644 index 00000000..4c81faa9 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_scals(d,a,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_scals + use iso_c_binding + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_d_cuda_hlg_scals diff --git a/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 new file mode 100644 index 00000000..566c94bd --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_to_gpu.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_to_gpu + use iso_c_binding + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_double,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 + +end subroutine psb_d_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 new file mode 100644 index 00000000..cccba74b --- /dev/null +++ b/cuda/impl/psb_d_cuda_hlg_vect_mv.F90 @@ -0,0 +1,119 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_hlg_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_d_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..50bef667 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_allocate_mnnz.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_allocate_mnnz +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_csmm.F90 b/cuda/impl/psb_d_cuda_hybg_csmm.F90 new file mode 100644 index 00000000..d5b49829 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_csmm + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hybg_csmv + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_cuda_hybg_inner_vect_sv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_inner_vect_sv +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_mold.F90 b/cuda/impl/psb_d_cuda_hybg_mold.F90 new file mode 100644 index 00000000..fa239696 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_mold(a,b,info) + + use psb_base_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_mold + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_cuda_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_mold +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..f44c3f10 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_reallocate_nz.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_cuda_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_d_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_reallocate_nz +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_scal.F90 b/cuda/impl/psb_d_cuda_hybg_scal.F90 new file mode 100644 index 00000000..5be8d801 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_scal.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scal + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_d_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_scal +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_scals.F90 b/cuda/impl/psb_d_cuda_hybg_scals.F90 new file mode 100644 index 00000000..59e21dd2 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_scals.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_scals + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_d_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_scals +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 new file mode 100644 index 00000000..7b8e2e5f --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_to_gpu.F90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_to_gpu + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_dpk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = done + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = done + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if + +end subroutine psb_d_cuda_hybg_to_gpu +#endif diff --git a/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 new file mode 100644 index 00000000..f1119439 --- /dev/null +++ b/cuda/impl/psb_d_cuda_hybg_vect_mv.F90 @@ -0,0 +1,118 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_hybg_vect_mv + use psb_d_cuda_vect_mod + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_cuda_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_d_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_d_vect_cuda) + select type(yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_d_cuda_hybg_vect_mv +#endif diff --git a/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..559bfb2c --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_coo + implicit none + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..c2411e90 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_csrg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_csrg_mat_mod, psb_protect_name => psb_d_cuda_mv_csrg_from_fmt + implicit none + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_d_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 new file mode 100644 index 00000000..a6a39a1c --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_d_cuda_diag_mat_mod, psb_protect_name => psb_d_cuda_mv_diag_from_coo + + implicit none + + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 new file mode 100644 index 00000000..9886e90c --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_elg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_coo + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..da2d47a1 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_elg_from_fmt.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_cuda_elg_mat_mod, psb_protect_name => psb_d_cuda_mv_elg_from_fmt + implicit none + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + type(elldev_parms) :: gpu_parms + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_d_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() + call a%to_gpu(info) + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..b3c4f650 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hdiag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_d_cuda_hdiag_mat_mod, psb_protect_name => psb_d_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod + + implicit none + + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_d_hdia_sparse_mat%mv_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..95e86293 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_coo + implicit none + + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..02578b19 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hlg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_d_cuda_hlg_mat_mod, psb_protect_name => psb_d_cuda_mv_hlg_from_fmt + implicit none + + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..3113fb13 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_coo.F90 @@ -0,0 +1,59 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_coo + implicit none + + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cuda_mv_hybg_from_coo +#endif diff --git a/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..520792e9 --- /dev/null +++ b/cuda/impl/psb_d_cuda_mv_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_d_cuda_hybg_mat_mod, psb_protect_name => psb_d_cuda_mv_hybg_from_fmt + implicit none + + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select +end subroutine psb_d_cuda_mv_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..0e3f9113 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_coo.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_coo + implicit none + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..76871b59 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_csrg_from_fmt.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_cp_csrg_from_fmt + !use iso_c_binding + implicit none + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_s_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 new file mode 100644 index 00000000..07025d77 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_diag_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_cp_diag_from_coo + implicit none + + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_s_dia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 new file mode 100644 index 00000000..66abf76a --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_elg_from_coo.F90 @@ -0,0 +1,161 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_coo + use psi_ext_util_mod + use psb_cuda_env_mod + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ + hacksize = max(1,psb_cuda_WarpSize()) + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + + call psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() + else + call b%cp_to_coo(tmp,info) + call psi_s_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_s_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_s_count_ell_from_coo + +end subroutine psb_s_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..77df12b3 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_elg_from_fmt.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_cp_elg_from_fmt + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(elldev_parms) :: gpu_parms + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_s_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza + call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..75210478 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hdiag_from_coo.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod + implicit none + + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_s_hdia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..c254b15a --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_coo.F90 @@ -0,0 +1,190 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_coo + implicit none + + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + hksz = max(1,psb_cuda_WarpSize()) + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_s_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..f04b65e5 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_cp_hlg_from_fmt + implicit none + + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_hll_sparse_mat%cp_from_fmt(b,info) + if (info == 0) call a%to_gpu(info) + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..69aa615b --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_coo + implicit none + + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_cp_hybg_from_coo +#endif diff --git a/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..95e00df1 --- /dev/null +++ b/cuda/impl/psb_s_cuda_cp_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_cp_hybg_from_fmt + implicit none + + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_s_cuda_cp_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..7e6f0c86 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_s_cuda_csrg_csmm.F90 b/cuda/impl/psb_s_cuda_csrg_csmm.F90 new file mode 100644 index 00000000..453f5260 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_csmm + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_csrg_csmv + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_csrg_from_gpu + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() + +end subroutine psb_s_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..df11952c --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_inner_vect_sv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_inner_vect_sv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_s_cuda_csrg_mold.F90 b/cuda/impl/psb_s_cuda_csrg_mold.F90 new file mode 100644 index 00000000..5e33850b --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_mold(a,b,info) + + use psb_base_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_mold + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_mold diff --git a/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..dfd115e7 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_reallocate_nz.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_cuda_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_s_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_s_cuda_csrg_scal.F90 b/cuda/impl/psb_s_cuda_csrg_scal.F90 new file mode 100644 index 00000000..ea3406a1 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_scal.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scal + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_s_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_scal diff --git a/cuda/impl/psb_s_cuda_csrg_scals.F90 b/cuda/impl/psb_s_cuda_csrg_scals.F90 new file mode 100644 index 00000000..307d5849 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_scals.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_scals + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_s_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_csrg_scals diff --git a/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 new file mode 100644 index 00000000..cc5b9c8d --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_to_gpu.F90 @@ -0,0 +1,378 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_to_gpu + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_spk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if (CUDA_SHORT_VERSION <= 10 ) + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#elif 0 + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if + +end subroutine psb_s_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 new file mode 100644 index 00000000..52820436 --- /dev/null +++ b/cuda/impl/psb_s_cuda_csrg_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_csrg_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_s_cuda_diag_csmv.F90 b/cuda/impl/psb_s_cuda_diag_csmv.F90 new file mode 100644 index 00000000..016b82bc --- /dev/null +++ b/cuda/impl/psb_s_cuda_diag_csmv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_csmv + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_cuda_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_diag_mold + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_diag_mold diff --git a/cuda/impl/psb_s_cuda_diag_to_gpu.F90 b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 new file mode 100644 index 00000000..c8578e75 --- /dev/null +++ b/cuda/impl/psb_s_cuda_diag_to_gpu.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cuda_diag_to_gpu(a,info,nzrm) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_to_gpu + use iso_c_binding + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d + type(diagdev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_float) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_float) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 + +end subroutine psb_s_cuda_diag_to_gpu diff --git a/cuda/impl/psb_s_cuda_diag_vect_mv.F90 b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 new file mode 100644 index 00000000..d68e5193 --- /dev/null +++ b/cuda/impl/psb_s_cuda_diag_vect_mv.F90 @@ -0,0 +1,116 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_diag_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_diag_vect_mv diff --git a/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 new file mode 100644 index 00000000..86fec6a7 --- /dev/null +++ b/cuda/impl/psb_s_cuda_dnsg_mat_impl.F90 @@ -0,0 +1,428 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_vect_mv + implicit none + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + real(psb_spk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='s_cuda_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_dnsg_vect_mv + + +subroutine psb_s_cuda_dnsg_mold(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_mold + implicit none + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz +!!$ end interface + +subroutine psb_s_cuda_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_dnsg_to_gpu + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='s_cuda_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_float,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_dnsg_to_gpu + + + +subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_coo + implicit none + + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_s_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_s_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_cp_dnsg_from_coo + +subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_cp_dnsg_from_fmt + implicit none + + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_s_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_cp_dnsg_from_fmt + + + +subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_coo + implicit none + + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='s_cuda_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_mv_dnsg_from_coo + +subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_s_cuda_vect_mod + use dnsdev_mod + use psb_s_vectordev_mod + use psb_s_cuda_dnsg_mat_mod, psb_protect_name => psb_s_cuda_mv_dnsg_from_fmt + implicit none + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_s_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_s_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..b771ca1b --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_allocate_mnnz.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(elldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_float,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_s_cuda_elg_asb.f90 b/cuda/impl/psb_s_cuda_elg_asb.f90 new file mode 100644 index 00000000..53a17a32 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_asb.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_asb(a) + + use psb_base_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_asb + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_asb diff --git a/cuda/impl/psb_s_cuda_elg_csmm.F90 b/cuda/impl/psb_s_cuda_elg_csmm.F90 new file mode 100644 index 00000000..ff7b7848 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_csmm.F90 @@ -0,0 +1,124 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csmm + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_elg_csmv + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_elg_csput_a + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_csput_a + + + +subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding + use elldev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_csput_v + use psb_s_cuda_vect_mod + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_cuda) + select type (ja) + class is (psb_i_vect_cuda) + select type (val) + class is (psb_s_vect_cuda) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceFloat(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_s_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_s_cuda_elg_csput_v diff --git a/cuda/impl/psb_s_cuda_elg_from_gpu.F90 b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 new file mode 100644 index 00000000..d995157e --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_from_gpu.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_from_gpu(a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_from_gpu + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_s_cuda_elg_from_gpu diff --git a/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..537365a6 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_inner_vect_sv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_s_cuda_elg_mold.F90 b/cuda/impl/psb_s_cuda_elg_mold.F90 new file mode 100644 index 00000000..7ff9c7ae --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_elg_mold(a,b,info) + + use psb_base_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_mold + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_mold diff --git a/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 new file mode 100644 index 00000000..5b99b9ad --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_reallocate_nz.F90 @@ -0,0 +1,72 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_reallocate_nz(nz,a) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_cuda_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_s_cuda_elg_scal.F90 b/cuda/impl/psb_s_cuda_elg_scal.F90 new file mode 100644 index 00000000..dfa99cf1 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_scal.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_scal(d,a,info,side) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scal + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_scal diff --git a/cuda/impl/psb_s_cuda_elg_scals.F90 b/cuda/impl/psb_s_cuda_elg_scals.F90 new file mode 100644 index 00000000..f0aa2504 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_scals(d,a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_scals + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_scals diff --git a/cuda/impl/psb_s_cuda_elg_to_gpu.F90 b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 new file mode 100644 index 00000000..9c16ea8d --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_to_gpu.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_to_gpu(a,info,nzrm) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_to_gpu + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt + type(elldev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_float,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_float,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_s_cuda_elg_to_gpu diff --git a/cuda/impl/psb_s_cuda_elg_trim.f90 b/cuda/impl/psb_s_cuda_elg_trim.f90 new file mode 100644 index 00000000..2d390343 --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_trim.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_trim(a) + + use psb_base_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_trim + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_trim diff --git a/cuda/impl/psb_s_cuda_elg_vect_mv.F90 b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 new file mode 100644 index 00000000..6c898fda --- /dev/null +++ b/cuda/impl/psb_s_cuda_elg_vect_mv.F90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_elg_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_elg_vect_mv diff --git a/cuda/impl/psb_s_cuda_hdiag_csmv.F90 b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 new file mode 100644 index 00000000..3f34c2e7 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hdiag_csmv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_csmv + implicit none + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_cuda_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hdiag_mold + implicit none + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hdiag_mold diff --git a/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 new file mode 100644 index 00000000..bc3fa325 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hdiag_to_gpu.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hdiag_to_gpu(a,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_to_gpu + use iso_c_binding + implicit none + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight + type(hdiagdev_parms) :: gpu_parms + + info = 0 + + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +end subroutine psb_s_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 new file mode 100644 index 00000000..03215047 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hdiag_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_hdiag_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..480f6677 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(hlldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_hll_sparse_mat%allocate(m,n,nz) + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_s_cuda_hlg_csmm.F90 b/cuda/impl/psb_s_cuda_hlg_csmm.F90 new file mode 100644 index 00000000..0dc28c7f --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_csmm.F90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_csmm + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hlg_csmv + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='s_cuda_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hlg_from_gpu + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_s_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..2985a1ab --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_inner_vect_sv.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_inner_vect_sv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_base_inner_vect_sv' + logical, parameter :: debug=.false. + real(psb_spk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_s_cuda_hlg_mold.F90 b/cuda/impl/psb_s_cuda_hlg_mold.F90 new file mode 100644 index 00000000..89e329e7 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_mold(a,b,info) + + use psb_base_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_mold + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_cuda_hlg_mold diff --git a/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..03742958 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_reallocate_nz.F90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_reallocate_nz + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_cuda_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_s_hll_sparse_mat%reallocate(nz) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_s_cuda_hlg_scal.F90 b/cuda/impl/psb_s_cuda_hlg_scal.F90 new file mode 100644 index 00000000..7074b8b6 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_scal.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_scal(d,a,info,side) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scal + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_scal diff --git a/cuda/impl/psb_s_cuda_hlg_scals.F90 b/cuda/impl/psb_s_cuda_hlg_scals.F90 new file mode 100644 index 00000000..2c9f5ae8 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_scals(d,a,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_scals + use iso_c_binding + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_s_cuda_hlg_scals diff --git a/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 new file mode 100644 index 00000000..91cfd5ad --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_to_gpu.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_to_gpu + use iso_c_binding + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_float,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 + +end subroutine psb_s_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 new file mode 100644 index 00000000..94696949 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hlg_vect_mv.F90 @@ -0,0 +1,119 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_hlg_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_s_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..d4649d4c --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_allocate_mnnz.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_allocate_mnnz +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_csmm.F90 b/cuda/impl/psb_s_cuda_hybg_csmm.F90 new file mode 100644 index 00000000..f321ce8b --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_csmm + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hybg_csmv + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_cuda_hybg_inner_vect_sv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=szero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_inner_vect_sv +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_mold.F90 b/cuda/impl/psb_s_cuda_hybg_mold.F90 new file mode 100644 index 00000000..4a1fc64e --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_mold(a,b,info) + + use psb_base_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_mold + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_cuda_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_mold +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..7311ed64 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_reallocate_nz.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_cuda_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_s_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_reallocate_nz +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_scal.F90 b/cuda/impl/psb_s_cuda_hybg_scal.F90 new file mode 100644 index 00000000..d6f8eb8d --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_scal.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scal + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_s_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_scal +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_scals.F90 b/cuda/impl/psb_s_cuda_hybg_scals.F90 new file mode 100644 index 00000000..f80d8efe --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_scals.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_scals + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_s_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_scals +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 new file mode 100644 index 00000000..168a0981 --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_to_gpu.F90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_to_gpu + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + real(psb_spk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = sone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = sone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if + +end subroutine psb_s_cuda_hybg_to_gpu +#endif diff --git a/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 new file mode 100644 index 00000000..6273f31a --- /dev/null +++ b/cuda/impl/psb_s_cuda_hybg_vect_mv.F90 @@ -0,0 +1,118 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_hybg_vect_mv + use psb_s_cuda_vect_mod + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_cuda_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_s_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_s_vect_cuda) + select type(yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= szero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_s_cuda_hybg_vect_mv +#endif diff --git a/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..b61e94d6 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_coo + implicit none + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..52643a10 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_csrg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_csrg_mat_mod, psb_protect_name => psb_s_cuda_mv_csrg_from_fmt + implicit none + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_s_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 new file mode 100644 index 00000000..1b2fe8a1 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_s_cuda_diag_mat_mod, psb_protect_name => psb_s_cuda_mv_diag_from_coo + + implicit none + + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 new file mode 100644 index 00000000..e8dbbabf --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_elg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_coo + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..21d9a339 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_elg_from_fmt.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_s_cuda_elg_mat_mod, psb_protect_name => psb_s_cuda_mv_elg_from_fmt + implicit none + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + type(elldev_parms) :: gpu_parms + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_s_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() + call a%to_gpu(info) + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..b0370ebb --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hdiag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_s_cuda_hdiag_mat_mod, psb_protect_name => psb_s_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod + + implicit none + + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_s_hdia_sparse_mat%mv_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..4c8aab71 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_coo + implicit none + + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..a162e2aa --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hlg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_s_cuda_hlg_mat_mod, psb_protect_name => psb_s_cuda_mv_hlg_from_fmt + implicit none + + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..ddae4546 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_coo.F90 @@ -0,0 +1,59 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_coo + implicit none + + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_s_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cuda_mv_hybg_from_coo +#endif diff --git a/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..c6250104 --- /dev/null +++ b/cuda/impl/psb_s_cuda_mv_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_s_cuda_hybg_mat_mod, psb_protect_name => psb_s_cuda_mv_hybg_from_fmt + implicit none + + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select +end subroutine psb_s_cuda_mv_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 new file mode 100644 index 00000000..90f3fb4f --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_coo.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_coo + implicit none + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_csrg_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 new file mode 100644 index 00000000..e086c8a4 --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_csrg_from_fmt.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_cp_csrg_from_fmt + !use iso_c_binding + implicit none + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_z_cuda_cp_csrg_from_fmt diff --git a/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 new file mode 100644 index 00000000..34706502 --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_diag_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_cp_diag_from_coo + implicit none + + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + call a%psb_z_dia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_diag_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 new file mode 100644 index 00000000..e8553cbf --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_elg_from_coo.F90 @@ -0,0 +1,161 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_coo + use psi_ext_util_mod + use psb_cuda_env_mod + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, ld, ldv, hacksize + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_), allocatable :: idisp(:) + + info = psb_success_ + hacksize = max(1,psb_cuda_WarpSize()) + if (b%is_dev()) call b%sync() + + if (b%is_by_rows()) then + + call psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,b%ja,b%val, a%deviceMat) + call a%set_dev() + else + call b%cp_to_coo(tmp,info) + call psi_z_count_ell_from_coo(a,tmp,idisp,ldv,nzm,info,hacksize=hacksize) + + + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + info = FallocEllDevice(a%deviceMat,nr,nzm,nza,nc,spgpu_type_double,1) + + if (info == 0) info = psi_CopyCooToElg(nr,nc,nza, hacksize,ldv,nzm, & + & a%irn,idisp,tmp%ja,tmp%val, a%deviceMat) + + call a%set_dev() + end if + + if (info /= psb_success_) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine psi_z_count_ell_from_coo(a,b,idisp,ldv,nzm,info,hacksize) + + use psb_base_mod + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(out) :: info, nzm, ldv + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, & + & ir, ic, hsz_ + real(psb_dpk_) :: t0,t1 + logical, parameter :: timing=.true. + + + info = psb_success_ + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == psb_success_) call psb_realloc(nr+1,idisp,info) + if (info /= psb_success_) return + if (timing) t0=psb_wtime() + + a%irn = 0 + do i=1, nza + ir = b%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + idisp(1) = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + idisp(i+1) = a%nzt + end do + + end subroutine psi_z_count_ell_from_coo + +end subroutine psb_z_cuda_cp_elg_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 new file mode 100644 index 00000000..85066cef --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_elg_from_fmt.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_cp_elg_from_fmt + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + type(elldev_parms) :: gpu_parms + + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_z_ell_sparse_mat) + nzm = psb_size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + end if + a%nzt = nza + call a%to_gpu(info) + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cuda_cp_elg_from_fmt diff --git a/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 new file mode 100644 index 00000000..36013faa --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hdiag_from_coo.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_cp_hdiag_from_coo + use psb_cuda_env_mod + implicit none + + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_z_hdia_sparse_mat%cp_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_hdiag_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 new file mode 100644 index 00000000..f7be0835 --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_coo.F90 @@ -0,0 +1,190 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_coo + implicit none + + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + integer(psb_ipk_), allocatable :: idisp(:) + character(len=20) :: name='hll_from_coo' + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + integer(psb_ipk_), allocatable :: irn(:), ja(:), hko(:) + real(psb_dpk_), allocatable :: val(:) + logical, parameter :: debug=.false. + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + hksz = max(1,psb_cuda_WarpSize()) + + if (b%is_by_rows()) then + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + if (debug) write(0,*) 'Copying through GPU',nza + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,b%ja, b%val, a%deviceMat) + call a%set_dev() + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + if (debug) write(0,*) 'Copying through GPU' + call psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,tmp,info) + if (info /=0) then + write(0,*) ' Error from psi_compute_hckoff:',info, noffs,isz + return + end if + if (debug)write(0,*) ' From psi_compute_hckoff:',noffs,isz,a%hkoffs(1:min(10,noffs+1)) + + if (c_associated(a%deviceMat)) then + call freeHllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,hksz,nr,nza,isz,spgpu_type_double,1) + if (info == 0) info = psi_CopyCooToHlg(nr,nc,nza, hksz,noffs,isz,& + & a%irn,a%hkoffs,idisp,tmp%ja, tmp%val, a%deviceMat) + + call tmp%free() + call a%set_dev() + end if + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +contains + subroutine psi_compute_hckoff_from_coo(a,noffs,isz,hksz,idisp,b,info) + use psb_base_mod + use psi_ext_util_mod + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), allocatable, intent(out) :: idisp(:) + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info, noffs, isz + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, kc + logical, parameter :: debug=.false. + + info = 0 + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (debug) write(0,*) 'Start compute hckoff_from_coo',nr,nc,nza + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info == 0) call psb_realloc(nr+1,idisp,info) + if (info /= 0) return + a%irn = 0 + if (debug) then + do i=1, nza + if ((1<=b%ia(i)).and.(b%ia(i)<= nr)) then + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + else + write(0,*) 'Out of bouds IA ',i,b%ia(i),nr + end if + end do + else + do i=1, nza + a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + end do + end if + a%nzt = nza + + + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (debug) write(0,*) ' noffsets ',noffs,info + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + idisp(1) = 0 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + idisp(i+1) = idisp(i) + a%irn(i) + do k=1,ir-1 + idisp(i+k+1) = idisp(i+k) + a%irn(i+k) + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) +!!$ write(*,*) 'End of psi_comput_hckoff ',info + end subroutine psi_compute_hckoff_from_coo + +end subroutine psb_z_cuda_cp_hlg_from_coo diff --git a/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 new file mode 100644 index 00000000..253a034f --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hlg_from_fmt.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_cp_hlg_from_fmt + implicit none + + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_hll_sparse_mat%cp_from_fmt(b,info) + if (info == 0) call a%to_gpu(info) + end select + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_hlg_from_fmt diff --git a/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 new file mode 100644 index 00000000..a6491d68 --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_coo + implicit none + + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%cp_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_cp_hybg_from_coo +#endif diff --git a/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 new file mode 100644 index 00000000..748fadca --- /dev/null +++ b/cuda/impl/psb_z_cuda_cp_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_cp_hybg_from_fmt + implicit none + + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_z_cuda_cp_hybg_from_fmt +#endif diff --git a/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 new file mode 100644 index 00000000..a7988dd3 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + if (info == 0) call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_allocate_mnnz diff --git a/cuda/impl/psb_z_cuda_csrg_csmm.F90 b/cuda/impl/psb_z_cuda_csrg_csmm.F90 new file mode 100644 index 00000000..731b7417 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_csmm + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csrg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_csrg_csmv + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_csrg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_csrg_from_gpu + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, n, nz + + info = 0 + + if (.not.(c_associated(a%deviceMat%mat))) then + call a%free() + return + end if + + info = CSRGDeviceGetParms(a%deviceMat,m,n,nz) + if (info /= psb_success_) return + + if (info == 0) call psb_realloc(m+1,a%irp,info) + if (info == 0) call psb_realloc(nz,a%ja,info) + if (info == 0) call psb_realloc(nz,a%val,info) + if (info == 0) info = & + & CSRGDevice2Host(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) +#if (CUDA_SHORT_VERSION <= 10) || (CUDA_VERSION < 11030) + a%irp(:) = a%irp(:)+1 + a%ja(:) = a%ja(:)+1 +#endif + + call a%set_sync() + +end subroutine psb_z_cuda_csrg_from_gpu diff --git a/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 new file mode 100644 index 00000000..9a3f8281 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_inner_vect_sv.F90 @@ -0,0 +1,125 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_inner_vect_sv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_csrg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=dzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='csrg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_inner_vect_sv diff --git a/cuda/impl/psb_z_cuda_csrg_mold.F90 b/cuda/impl/psb_z_cuda_csrg_mold.F90 new file mode 100644 index 00000000..23bb658a --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_mold.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_mold(a,b,info) + + use psb_base_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_mold + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='csrg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_csrg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_mold diff --git a/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 new file mode 100644 index 00000000..964cd84e --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_reallocate_nz.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_cuda_csrg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_z_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_reallocate_nz diff --git a/cuda/impl/psb_z_cuda_csrg_scal.F90 b/cuda/impl/psb_z_cuda_csrg_scal.F90 new file mode 100644 index 00000000..9d97433e --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_scal.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scal + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + call a%psb_z_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_scal diff --git a/cuda/impl/psb_z_cuda_csrg_scals.F90 b/cuda/impl/psb_z_cuda_csrg_scals.F90 new file mode 100644 index 00000000..1479ea3a --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_scals.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_scals + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + call a%psb_z_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_csrg_scals diff --git a/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 new file mode 100644 index 00000000..56943f37 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_to_gpu.F90 @@ -0,0 +1,378 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_to_gpu + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_dpk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = CSRGDeviceFree(a%deviceMat) + end if +#if (CUDA_SHORT_VERSION <= 10 ) + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) +!!$ if (info == 0) then +!!$ if (a%is_unit()) then +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) +!!$ else +!!$ info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) +!!$ end if +!!$ end if + if ((info == 0) .and. a%is_triangle()) then + !info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceCsrsmAnalysis(a%deviceMat) + end if + +#elif CUDA_VERSION < 11030 + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) +!!$ write(0,*) 'Done deviceAlloc' + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_zero) +!!$ write(0,*) 'Done SetIndexBase' + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(0:nzdi),valdi(0:nzdi),stat=info) + if (info == 0) then + irpdi(1) = 0 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1)-1 + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ write(0,*) 'Done deviceAlloc', info + if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,& + & cusparse_index_base_zero) +!!$ write(0,*) 'Done setIndexBase', info + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then + info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + nzdi=a%irp(m+1)-1 + if (info == 0) allocate(irpdi(m+1),jadi(max(nzdi,1)),stat=info) + if (info == 0) then + irpdi(1:m+1) = a%irp(1:m+1) -1 + jadi(1:nzdi) = a%ja(1:nzdi) -1 + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,irpdi,jadi,a%val) +!!$ write(0,*) 'Done Host2Device', info + endif + + +#elif 0 + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + !! info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) +!!$ if (info == 0) info = CSRGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +!!$ if ((info == 0) .and. a%is_triangle()) then +!!$ info = CSRGDeviceCsrsmAnalysis(a%deviceMat) +!!$ end if + +#else + + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) then + if (a%is_unit()) then + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = CSRGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + !!! We are explicitly adding the diagonal + if ((info == 0) .and. a%is_triangle()) then + if ((info == 0).and.a%is_upper()) then + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = CSRGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + + else + + if (info == 0) info = CSRGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = CSRGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + endif + +#endif + call a%set_sync() + + if (info /= 0) then + write(0,*) 'Error in CSRG_TO_GPU ',info + end if + +end subroutine psb_z_cuda_csrg_to_gpu diff --git a/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 new file mode 100644 index 00000000..977d7ff9 --- /dev/null +++ b/cuda/impl/psb_z_cuda_csrg_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_csrg_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_csrg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvCSRGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvCSRGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_cuda_csrg_vect_mv diff --git a/cuda/impl/psb_z_cuda_diag_csmv.F90 b/cuda/impl/psb_z_cuda_diag_csmv.F90 new file mode 100644 index 00000000..fde7147e --- /dev/null +++ b/cuda/impl/psb_z_cuda_diag_csmv.F90 @@ -0,0 +1,127 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_csmv + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_cuda_diag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_diag_mold + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='diag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_diag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_diag_mold diff --git a/cuda/impl/psb_z_cuda_diag_to_gpu.F90 b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 new file mode 100644 index 00000000..672ce938 --- /dev/null +++ b/cuda/impl/psb_z_cuda_diag_to_gpu.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cuda_diag_to_gpu(a,info,nzrm) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_to_gpu + use iso_c_binding + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, c,pitch,maxrowsize,d + type(diagdev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%data)).or.(.not.allocated(a%offset))) return + + n = size(a%data,1) + d = size(a%data,2) + c = a%get_ncols() + !allocsize = a%get_size() + !write(*,*) 'Create the DIAG matrix' + gpu_parms = FgetDiagDeviceParams(n,c,d,spgpu_type_complex_double) + if (c_associated(a%deviceMat)) then + call freeDiagDevice(a%deviceMat) + endif + info = FallocDiagDevice(a%deviceMat,n,c,d,spgpu_type_complex_double) + if (info == 0) info = & + & writeDiagDevice(a%deviceMat,a%data,a%offset,n) +! if (info /= 0) goto 9999 + +end subroutine psb_z_cuda_diag_to_gpu diff --git a/cuda/impl/psb_z_cuda_diag_vect_mv.F90 b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 new file mode 100644 index 00000000..c6d11f04 --- /dev/null +++ b/cuda/impl/psb_z_cuda_diag_vect_mv.F90 @@ -0,0 +1,116 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_diag_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_diag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= szero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_dia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_diag_vect_mv diff --git a/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 new file mode 100644 index 00000000..c79a7e8c --- /dev/null +++ b/cuda/impl/psb_z_cuda_dnsg_mat_impl.F90 @@ -0,0 +1,428 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_vect_mv + implicit none + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + logical :: tra + character :: trans_ + complex(psb_dpk_), allocatable :: rx(:), ry(:) + Integer(Psb_ipk_) :: err_act, m, n, k + character(len=20) :: name='z_cuda_dnsg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (trans_ =='N') then + m = a%get_nrows() + n = 1 + k = a%get_ncols() + else + m = a%get_ncols() + n = 1 + k = a%get_nrows() + end if + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvDnsDevice(trans_,m,n,k,alpha,a%deviceMat,& + & xx%deviceVect,beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvDnsDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_dnsg_vect_mv + + +subroutine psb_z_cuda_dnsg_mold(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_mold + implicit none + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dnsg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_dnsg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_dnsg_mold + + +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz +!!$ end interface + +subroutine psb_z_cuda_dnsg_to_gpu(a,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_dnsg_to_gpu + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act, pitch, lda + logical, parameter :: debug=.false. + character(len=20) :: name='z_cuda_dnsg_to_gpu' + + call psb_erractionsave(err_act) + info = psb_success_ + if (debug) write(0,*) 'DNS_TO_GPU',size(a%val,1),size(a%val,2) + info = FallocDnsDevice(a%deviceMat,a%get_nrows(),a%get_ncols(),& + & spgpu_type_complex_double,1) + if (info == 0) info = writeDnsDevice(a%deviceMat,a%val,size(a%val,1),size(a%val,2)) + if (debug) write(0,*) 'DNS_TO_GPU: From writeDnsDEvice',info + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_dnsg_to_gpu + + + +subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_coo + implicit none + + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_dnsg_cp_from_coo' + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + type(psb_z_coo_sparse_mat) :: tmp + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + call a%psb_z_dns_sparse_mat%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_cp_from_coo: dns_cp',info + if (info == 0) call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_cp_dnsg_from_coo + +subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_cp_dnsg_from_fmt + implicit none + + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + +!!$ class is (psb_z_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_cp_dnsg_from_fmt + + + +subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_coo + implicit none + + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + logical, parameter :: debug=.false. + character(len=20) :: name='z_cuda_dnsg_mv_from_coo' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + if (debug) write(0,*) 'dnsg_mv_from_coo: cp_from_coo:',info + call b%free() + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_mv_dnsg_from_coo + +subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) + use psb_base_mod + use psb_z_cuda_vect_mod + use dnsdev_mod + use psb_z_vectordev_mod + use psb_z_cuda_dnsg_mat_mod, psb_protect_name => psb_z_cuda_mv_dnsg_from_fmt + implicit none + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_dnsg_cp_from_fmt' + + call psb_erractionsave(err_act) + info = psb_success_ + if (b%is_dev()) call b%sync() + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + +!!$ class is (psb_z_ell_sparse_mat) +!!$ nzm = psb_size(b%ja,2) +!!$ m = b%get_nrows() +!!$ nc = b%get_ncols() +!!$ nza = b%get_nzeros() +!!$#ifdef HAVE_SPGPU +!!$ gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) +!!$ ld = gpu_parms%pitch +!!$ nzm = gpu_parms%maxRowSize +!!$#else +!!$ ld = m +!!$#endif +!!$ a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat +!!$ if (info == 0) call psb_safe_cpy( b%idiag, a%idiag , info) +!!$ if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) +!!$ if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) +!!$ if (info == 0) call psb_safe_cpy( b%val, a%val , info) +!!$ if (info == 0) call psb_realloc(ld,nzm,a%ja,info) +!!$ if (info == 0) then +!!$ a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) +!!$ end if +!!$ if (info == 0) call psb_realloc(ld,nzm,a%val,info) +!!$ if (info == 0) then +!!$ a%val(1:m,1:nzm) = b%val(1:m,1:nzm) +!!$ end if +!!$ a%nzt = nza +!!$#ifdef HAVE_SPGPU +!!$ call a%to_gpu(info) +!!$#endif + + class default + + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_z_cuda_mv_dnsg_from_fmt diff --git a/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 new file mode 100644 index 00000000..5c54d00b --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_allocate_mnnz.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(elldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione,izero,izero,izero,izero/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione,izero,izero,izero,izero/)) + goto 9999 + endif + + gpu_parms = FgetEllDeviceParams(m,nz_,nz_*m,n,spgpu_type_complex_double,1) + ld = gpu_parms%pitch + nz_ = gpu_parms%maxRowSize + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_allocate_mnnz diff --git a/cuda/impl/psb_z_cuda_elg_asb.f90 b/cuda/impl/psb_z_cuda_elg_asb.f90 new file mode 100644 index 00000000..65b58425 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_asb.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_asb(a) + + use psb_base_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_asb + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + integer(psb_ipk_) :: err_act, info + character(len=20) :: name='elg_asb' + logical :: clear_ + logical, parameter :: debug=.false. + real(psb_dpk_), allocatable :: valt(:,:) + integer(psb_ipk_), allocatable :: jat(:,:) + integer(psb_ipk_) :: nr, nc + + call psb_erractionsave(err_act) + info = psb_success_ + + ! Only call sync() if we are on host + if (a%is_host()) then + call a%sync() + end if + call a%set_asb() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_asb diff --git a/cuda/impl/psb_z_cuda_elg_csmm.F90 b/cuda/impl/psb_z_cuda_elg_csmm.F90 new file mode 100644 index 00000000..4414f0e6 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_csmm.F90 @@ -0,0 +1,124 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csmm + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_elg_csmv + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_elg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_elg_csput_a + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +!!$ write(0,*) 'In ELG_csput_a' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then +!!$ write(*,*) 'elg_csput_a ' + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_csput_a + + + +subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use iso_c_binding + use elldev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_csput_v + use psb_z_cuda_vect_mod + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_csput_v' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit, nrw + logical :: gpu_invoked + real(psb_dpk_) :: t1,t2,t3 + type(c_ptr) :: devIdxUpd + integer(psb_ipk_), allocatable :: idxs(:) + logical, parameter :: debug_idxs=.false., debug_vals=.false. + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + +! write(0,*) 'In ELG_csput_v' + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (ia%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (ja%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (val%get_nrows() < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + + t1=psb_wtime() + gpu_invoked = .false. + select type (ia) + class is (psb_i_vect_cuda) + select type (ja) + class is (psb_i_vect_cuda) + select type (val) + class is (psb_z_vect_cuda) + if (a%is_host()) call a%sync() + if (val%is_host()) call val%sync() + if (ia%is_host()) call ia%sync() + if (ja%is_host()) call ja%sync() + info = csputEllDeviceDoubleComplex(a%deviceMat,nz,& + & ia%deviceVect,ja%deviceVect,val%deviceVect) + call a%set_dev() + gpu_invoked=.true. + end select + end select + end select + if (.not.gpu_invoked) then +!!$ write(0,*)'Not gpu_invoked ' + if (a%is_dev()) call a%sync() + call a%psb_z_ell_sparse_mat%csput(nz,ia,ja,val,& + & imin,imax,jmin,jmax,info) + call a%set_host() + end if + + if (info /= 0) then + info = psb_err_internal_error_ + end if + + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + + +end subroutine psb_z_cuda_elg_csput_v diff --git a/cuda/impl/psb_z_cuda_elg_from_gpu.F90 b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 new file mode 100644 index 00000000..b1291ab2 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_from_gpu.F90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_from_gpu(a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_from_gpu + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize + + info = 0 + + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + if (info == 0) call psb_realloc(pitch,a%irn,info) + end if + if (info == 0) info = & + & readEllDevice(a%deviceMat,a%val,a%ja,pitch,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_z_cuda_elg_from_gpu diff --git a/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 new file mode 100644 index 00000000..443e7cbb --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_inner_vect_sv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + if (a%is_dev()) call a%sync() + if (.false.) then + rx = x%get_vect() + ry = y%get_vect() + call a%inner_spsm(alpha,rx,beta,ry,info,trans) + call y%bld(ry) + else + call x%sync() + call y%sync() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + end if + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_inner_vect_sv diff --git a/cuda/impl/psb_z_cuda_elg_mold.F90 b/cuda/impl/psb_z_cuda_elg_mold.F90 new file mode 100644 index 00000000..e9a3891d --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_elg_mold(a,b,info) + + use psb_base_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_mold + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='elg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_elg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_mold diff --git a/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 new file mode 100644 index 00000000..3a8c2760 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_reallocate_nz.F90 @@ -0,0 +1,72 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_reallocate_nz(nz,a) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_cuda_elg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + if (a%is_dev()) call a%sync() + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call a%to_gpu(info,nzrm=nzrm) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_reallocate_nz diff --git a/cuda/impl/psb_z_cuda_elg_scal.F90 b/cuda/impl/psb_z_cuda_elg_scal.F90 new file mode 100644 index 00000000..a9846362 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_scal.F90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_scal(d,a,info,side) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scal + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_ell_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_scal diff --git a/cuda/impl/psb_z_cuda_elg_scals.F90 b/cuda/impl/psb_z_cuda_elg_scals.F90 new file mode 100644 index 00000000..b4462589 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_scals(d,a,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_scals + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_scals diff --git a/cuda/impl/psb_z_cuda_elg_to_gpu.F90 b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 new file mode 100644 index 00000000..3a0ecd14 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_to_gpu.F90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_to_gpu(a,info,nzrm) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_to_gpu + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize, nzt + type(elldev_parms) :: gpu_parms + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + nzm = psb_size(a%val,2) + n = a%get_ncols() + nzt = a%get_nzeros() + if (present(nzrm)) nzm = max(nzm,nzrm) + + gpu_parms = FgetEllDeviceParams(m,nzm,nzt,n,spgpu_type_complex_double,1) + + if (c_associated(a%deviceMat)) then + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + else + pitch = -1 + maxrowsize = -1 + end if + + if ((pitch /= gpu_parms%pitch).or.(maxrowsize /= gpu_parms%maxRowSize)) then + if (c_associated(a%deviceMat)) then + call freeEllDevice(a%deviceMat) + endif + info = FallocEllDevice(a%deviceMat,m,nzm,nzt,n,spgpu_type_complex_double,1) + pitch = getEllDevicePitch(a%deviceMat) + maxrowsize = getEllDeviceMaxRowSize(a%deviceMat) + end if + if (info == 0) then + if ((pitch /= psb_size(a%val,1)).or.(maxrowsize /= psb_size(a%val,2))) then + call psb_realloc(pitch,maxrowsize,a%val,info) + if (info == 0) call psb_realloc(pitch,maxrowsize,a%ja,info) + end if + end if + if (info == 0) info = & + & writeEllDevice(a%deviceMat,a%val,a%ja,size(a%ja,1),a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_z_cuda_elg_to_gpu diff --git a/cuda/impl/psb_z_cuda_elg_trim.f90 b/cuda/impl/psb_z_cuda_elg_trim.f90 new file mode 100644 index 00000000..98f92efe --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_trim.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_trim(a) + + use psb_base_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_trim + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm,ld + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + ld = max(1_psb_ipk_,size(a%ja,1)) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_trim diff --git a/cuda/impl/psb_z_cuda_elg_vect_mv.F90 b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 new file mode 100644 index 00000000..1b1a0720 --- /dev/null +++ b/cuda/impl/psb_z_cuda_elg_vect_mv.F90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_elg_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_elg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (a%is_dev()) call a%sync() + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_ell_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvEllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvELLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + if (a%is_dev()) call a%sync() + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + if (a%is_dev()) call a%sync() + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_elg_vect_mv diff --git a/cuda/impl/psb_z_cuda_hdiag_csmv.F90 b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 new file mode 100644 index 00000000..187655a8 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hdiag_csmv.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_csmv + implicit none + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_cuda_hdiag_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hdiag_mold + implicit none + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdiag_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_hdiag_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hdiag_mold diff --git a/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 new file mode 100644 index 00000000..a1140961 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hdiag_to_gpu.F90 @@ -0,0 +1,76 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hdiag_to_gpu(a,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_to_gpu + use iso_c_binding + implicit none + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nr, nc, hacksize, hackcount, allocheight + type(hdiagdev_parms) :: gpu_parms + + info = 0 + + nr = a%get_nrows() + nc = a%get_ncols() + hacksize = a%hackSize + hackCount = a%nhacks + if (.not.allocated(a%hackOffsets)) then + info = -1 + return + end if + allocheight = a%hackOffsets(hackCount+1) +!!$ write(*,*) 'HDIAG TO GPU:',nr,nc,hacksize,hackCount,allocheight,& +!!$ & size(a%hackoffsets),size(a%diaoffsets), size(a%val) + if (.not.allocated(a%diaOffsets)) then + info = -2 + return + end if + if (.not.allocated(a%val)) then + info = -3 + return + end if + + if (c_associated(a%deviceMat)) then + call freeHdiagDevice(a%deviceMat) + endif + + info = FAllocHdiagDevice(a%deviceMat,nr,nc,& + & allocheight,hacksize,hackCount,spgpu_type_double) + if (info == 0) info = & + & writeHdiagDevice(a%deviceMat,a%val,a%diaOffsets,a%hackOffsets) + +end subroutine psb_z_cuda_hdiag_to_gpu diff --git a/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 new file mode 100644 index 00000000..aef5628c --- /dev/null +++ b/cuda/impl/psb_z_cuda_hdiag_vect_mv.F90 @@ -0,0 +1,117 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_hdiag_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hdiag_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= dzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_hdia_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHdiagDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHDIAGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hdiag_vect_mv diff --git a/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 new file mode 100644 index 00000000..f8566661 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_allocate_mnnz.F90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + type(hlldev_parms) :: gpu_parms + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_hll_sparse_mat%allocate(m,n,nz) + + call a%to_gpu(info,nzrm=nz_) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_allocate_mnnz diff --git a/cuda/impl/psb_z_cuda_hlg_csmm.F90 b/cuda/impl/psb_z_cuda_hlg_csmm.F90 new file mode 100644 index 00000000..8eb30ef9 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_csmm.F90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_csmm + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hlg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hlg_csmv + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='z_cuda_hlg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hlg_from_gpu + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: hksize,rows,nzeros,allocsize,hackOffsLength,firstIndex,avgnzr + + info = 0 + + if (a%is_sync()) return + if (a%is_host()) return + if (.not.(c_associated(a%deviceMat))) then + call a%free() + return + end if + + + info = getHllDeviceParams(a%deviceMat,hksize, rows, nzeros, allocsize,& + & hackOffsLength, firstIndex,avgnzr) + + if (info == 0) call a%set_nzeros(nzeros) + if (info == 0) call a%set_hksz(hksize) + if (info == 0) call psb_realloc(rows,a%irn,info) + if (info == 0) call psb_realloc(rows,a%idiag,info) + if (info == 0) call psb_realloc(allocsize,a%ja,info) + if (info == 0) call psb_realloc(allocsize,a%val,info) + if (info == 0) call psb_realloc((hackOffsLength+1),a%hkoffs,info) + + if (info == 0) info = & + & readHllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) + call a%set_sync() + +end subroutine psb_z_cuda_hlg_from_gpu diff --git a/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 new file mode 100644 index 00000000..6a914fc0 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_inner_vect_sv.F90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_inner_vect_sv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_base_inner_vect_sv' + logical, parameter :: debug=.false. + complex(psb_dpk_), allocatable :: rx(:), ry(:) + + call psb_get_erraction(err_act) + info = psb_success_ + + + call x%sync() + call y%sync() + if (a%is_dev()) call a%sync() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='inner_cssm') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_inner_vect_sv diff --git a/cuda/impl/psb_z_cuda_hlg_mold.F90 b/cuda/impl/psb_z_cuda_hlg_mold.F90 new file mode 100644 index 00000000..bc631ece --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_mold.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_mold(a,b,info) + + use psb_base_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_mold + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer, intent(out) :: info + Integer :: err_act + character(len=20) :: name='hlg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_hlg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_cuda_hlg_mold diff --git a/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 new file mode 100644 index 00000000..ac3fbbd1 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_reallocate_nz.F90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_reallocate_nz + use iso_c_binding + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_cuda_hlg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + call a%psb_z_hll_sparse_mat%reallocate(nz) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_reallocate_nz diff --git a/cuda/impl/psb_z_cuda_hlg_scal.F90 b/cuda/impl/psb_z_cuda_hlg_scal.F90 new file mode 100644 index 00000000..7b9df998 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_scal.F90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_scal(d,a,info,side) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scal + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_hll_sparse_mat%scal(d,info,side) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_scal diff --git a/cuda/impl/psb_z_cuda_hlg_scals.F90 b/cuda/impl/psb_z_cuda_hlg_scals.F90 new file mode 100644 index 00000000..b867e3b8 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_scals.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_scals(d,a,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_scals + use iso_c_binding + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_hll_sparse_mat%scal(d,info) + if (info /= psb_success_) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return +end subroutine psb_z_cuda_hlg_scals diff --git a/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 new file mode 100644 index 00000000..8f81842a --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_to_gpu.F90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_to_gpu(a,info,nzrm) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_to_gpu + use iso_c_binding + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, nza, n, pitch,maxrowsize, allocsize + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + n = a%get_nrows() + allocsize = a%get_size() + nza = a%get_nzeros() + if (c_associated(a%deviceMat)) then + call freehllDevice(a%deviceMat) + endif + info = FallochllDevice(a%deviceMat,a%hksz,n,nza,allocsize,spgpu_type_complex_double,1) + if (info == 0) info = & + & writehllDevice(a%deviceMat,a%val,a%ja,a%hkoffs,a%irn,a%idiag) +! if (info /= 0) goto 9999 + +end subroutine psb_z_cuda_hlg_to_gpu diff --git a/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 new file mode 100644 index 00000000..e2e93b85 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hlg_vect_mv.F90 @@ -0,0 +1,119 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_hlg_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hlg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + if (a%is_dev()) call a%sync() + call a%psb_z_hll_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvhllDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHLLDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + if (a%is_dev()) call a%sync() + call a%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hlg_vect_mv diff --git a/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 new file mode 100644 index 00000000..39e10134 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_allocate_mnnz.F90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_,ld + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m,n,nz) + + info = initFcusparse() + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_allocate_mnnz +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_csmm.F90 b/cuda/impl/psb_z_cuda_hybg_csmm.F90 new file mode 100644 index 00000000..bcd1874a --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_csmm.F90 @@ -0,0 +1,126 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_csmm + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + type(c_ptr) :: gpX, gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hybg_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hybg_csmv + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + type(c_ptr) :: gpX + type(c_ptr) :: gpY + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hybg_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_cuda_hybg_inner_vect_sv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hybg_inner_vect_sv' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + ! This is the base version. If we get here + ! it means the derived class is incomplete, + ! so we throw an error. + info = psb_success_ + + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra.or.(beta/=zzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,x,beta,y,info,trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spsvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spsvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info,name, a_err='hybg_vect_sv') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_inner_vect_sv +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_mold.F90 b/cuda/impl/psb_z_cuda_hybg_mold.F90 new file mode 100644 index 00000000..5a13ff19 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_mold.F90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_mold(a,b,info) + + use psb_base_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_mold + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hybg_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_cuda_hybg_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_mold +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 new file mode 100644 index 00000000..f295b332 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_reallocate_nz.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_cuda_hybg_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + ! + ! What should this really do??? + ! + call a%psb_z_csr_sparse_mat%reallocate(nz) + + call a%to_gpu(info,nzrm=nz) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_reallocate_nz +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_scal.F90 b/cuda/impl/psb_z_cuda_hybg_scal.F90 new file mode 100644 index 00000000..40b9d6b0 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_scal.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_scal(d,a,info,side) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scal + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m,n,nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + call a%psb_z_csr_sparse_mat%scal(d,info,side=side) + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_scal +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_scals.F90 b/cuda/impl/psb_z_cuda_hybg_scals.F90 new file mode 100644 index 00000000..bd7b9224 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_scals.F90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_scals(d,a,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_scals + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, nz + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_unit()) then + call a%make_nonunit() + end if + + + call a%psb_z_csr_sparse_mat%scal(d,info) + + if (info /= 0) goto 9999 + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_scals +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 new file mode 100644 index 00000000..f3a32c81 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_to_gpu.F90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_to_gpu(a,info,nzrm) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_to_gpu + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + + integer(psb_ipk_) :: m, nzm, n, pitch,maxrowsize,nz + integer(psb_ipk_) :: nzdi,i,j,k,nrz + integer(psb_ipk_), allocatable :: irpdi(:),jadi(:) + complex(psb_dpk_), allocatable :: valdi(:) + + info = 0 + + if ((.not.allocated(a%val)).or.(.not.allocated(a%ja))) return + + m = a%get_nrows() + n = a%get_ncols() + nz = a%get_nzeros() + if (c_associated(a%deviceMat%Mat)) then + info = HYBGDeviceFree(a%deviceMat) + end if + if (a%is_unit()) then + ! + ! CUSPARSE has the habit of storing the diagonal and then ignoring, + ! whereas we do not store it. Hence this adapter code. + ! + nzdi = nz + m + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nzdi) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! We are explicitly adding the diagonal + if (info == 0) info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) allocate(irpdi(m+1),jadi(nzdi),valdi(nzdi),stat=info) + if (info == 0) then + irpdi(1) = 1 + if (a%is_triangle().and.a%is_upper()) then + do i=1,m + j = irpdi(i) + jadi(j) = i + valdi(j) = zone + nrz = a%irp(i+1)-a%irp(i) + jadi(j+1:j+nrz) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+1:j+nrz) = a%val(a%irp(i):a%irp(i+1)-1) + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + else + do i=1,m + j = irpdi(i) + nrz = a%irp(i+1)-a%irp(i) + jadi(j+0:j+nrz-1) = a%ja(a%irp(i):a%irp(i+1)-1) + valdi(j+0:j+nrz-1) = a%val(a%irp(i):a%irp(i+1)-1) + jadi(j+nrz) = i + valdi(j+nrz) = zone + irpdi(i+1) = j + nrz + 1 + ! write(0,*) 'Row ',i,' : ',irpdi(i:i+1),':',jadi(j:j+nrz),valdi(j:j+nrz) + end do + end if + end if + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nzdi,irpdi,jadi,valdi) + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + else + + if (info == 0) info = HYBGDeviceAlloc(a%deviceMat,m,n,nz) + if (info == 0) info = HYBGDeviceSetMatIndexBase(a%deviceMat,cusparse_index_base_one) + ! Dirty trick: CUSPARSE 4.1 wants to have a matrix declared GENERAL when + ! doing csr2hyb (inside Host2Device), so we do it here, and afterwards overwrite with + ! TRIANGULAR if needed. Weird, but works. + if (info == 0) info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_general) + if (info == 0) then + if (a%is_unit()) then + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_unit) + else + info = HYBGDeviceSetMatDiagType(a%deviceMat,cusparse_diag_type_non_unit) + end if + end if + + if (info == 0) info = HYBGHost2Device(a%deviceMat,m,n,nz,a%irp,a%ja,a%val) + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceSetMatType(a%deviceMat,cusparse_matrix_type_triangular) + if ((info == 0).and.a%is_upper()) then + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_upper) + else + info = HYBGDeviceSetMatFillMode(a%deviceMat,cusparse_fill_mode_lower) + end if + end if + + endif + + if ((info == 0) .and. a%is_triangle()) then + info = HYBGDeviceHybsmAnalysis(a%deviceMat) + end if + + + if (info /= 0) then + write(0,*) 'Error in HYBG_TO_GPU ',info + end if + +end subroutine psb_z_cuda_hybg_to_gpu +#endif diff --git a/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 new file mode 100644 index 00000000..0cb1db22 --- /dev/null +++ b/cuda/impl/psb_z_cuda_hybg_vect_mv.F90 @@ -0,0 +1,118 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use cusparse_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_hybg_vect_mv + use psb_z_cuda_vect_mod + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_cuda_hybg_vect_mv' + + call psb_erractionsave(err_act) + info = psb_success_ + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + if (.not.x%is_host()) call x%sync() + if (beta /= zzero) then + if (.not.y%is_host()) call y%sync() + end if + call a%psb_z_csr_sparse_mat%spmm(alpha,x,beta,y,info,trans) + call y%set_host() + else + if (a%is_host()) call a%sync() + select type (xx => x) + type is (psb_z_vect_cuda) + select type(yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (beta /= zzero) then + if (yy%is_host()) call yy%sync() + end if + info = spmvHYBGDevice(a%deviceMat,alpha,xx%deviceVect,& + & beta,yy%deviceVect) + if (info /= 0) then + call psb_errpush(psb_err_from_subroutine_ai_,name,& + & a_err='spmvHYBGDevice',i_err=(/info,izero,izero,izero,izero/)) + info = psb_err_from_subroutine_ai_ + goto 9999 + end if + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%spmm(alpha,rx,beta,ry,info) + call y%bld(ry) + end select + end if + if (info /= 0) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine psb_z_cuda_hybg_vect_mv +#endif diff --git a/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 new file mode 100644 index 00000000..7cfd4f19 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_coo.F90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_coo + implicit none + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_mv_csrg_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 new file mode 100644 index 00000000..f03294c8 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_csrg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_csrg_mat_mod, psb_protect_name => psb_z_cuda_mv_csrg_from_fmt + implicit none + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer, intent(out) :: info + + !locals + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select + +end subroutine psb_z_cuda_mv_csrg_from_fmt diff --git a/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 new file mode 100644 index 00000000..8e702e7e --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_diag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) + + use psb_base_mod + use diagdev_mod + use psb_vectordev_mod + use psb_z_cuda_diag_mat_mod, psb_protect_name => psb_z_cuda_mv_diag_from_coo + + implicit none + + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) goto 9999 + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_mv_diag_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 new file mode 100644 index 00000000..f0cb23f3 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_elg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_coo + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + if (b%is_dev()) call b%sync() + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_cuda_mv_elg_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 new file mode 100644 index 00000000..29f63423 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_elg_from_fmt.F90 @@ -0,0 +1,86 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) + + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_z_cuda_elg_mat_mod, psb_protect_name => psb_z_cuda_mv_elg_from_fmt + implicit none + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, ld, nzm, m + type(elldev_parms) :: gpu_parms + + info = psb_success_ + + if (b%is_dev()) call b%sync() + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + class is (psb_z_ell_sparse_mat) + nzm = size(b%ja,2) + m = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + gpu_parms = FgetEllDeviceParams(m,nzm,nza,nc,spgpu_type_double,1) + ld = gpu_parms%pitch + nzm = gpu_parms%maxRowSize + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call psb_realloc(ld,nzm,a%ja,info) + if (info == 0) then + a%ja(1:m,1:nzm) = b%ja(1:m,1:nzm) + deallocate(b%ja,stat=info) + end if + if (info == 0) call psb_realloc(ld,nzm,a%val,info) + if (info == 0) then + a%val(1:m,1:nzm) = b%val(1:m,1:nzm) + deallocate(b%val,stat=info) + end if + a%nzt = nza + call b%free() + call a%to_gpu(info) + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cuda_mv_elg_from_fmt diff --git a/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 new file mode 100644 index 00000000..dd6dae1e --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hdiag_from_coo.F90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) + + use psb_base_mod + use hdiagdev_mod + use psb_vectordev_mod + use psb_z_cuda_hdiag_mat_mod, psb_protect_name => psb_z_cuda_mv_hdiag_from_coo + use psb_cuda_env_mod + + implicit none + + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + + a%hacksize = psb_cuda_WarpSize() + + call a%psb_z_hdia_sparse_mat%mv_from_coo(b,info) + + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_mv_hdiag_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 new file mode 100644 index 00000000..609680b9 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_coo.F90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_cuda_env_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_coo + implicit none + + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_cuda_mv_hlg_from_coo diff --git a/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 new file mode 100644 index 00000000..e67c8d83 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hlg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) + + use psb_base_mod + use hlldev_mod + use psb_vectordev_mod + use psb_z_cuda_hlg_mat_mod, psb_protect_name => psb_z_cuda_mv_hlg_from_fmt + implicit none + + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cuda_mv_hlg_from_fmt diff --git a/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 new file mode 100644 index 00000000..69fda3d6 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_coo.F90 @@ -0,0 +1,59 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_coo + implicit none + + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%psb_z_csr_sparse_mat%mv_from_coo(b,info) + if (info /= 0) goto 9999 + call a%to_gpu(info) + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cuda_mv_hybg_from_coo +#endif diff --git a/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 new file mode 100644 index 00000000..55c33a09 --- /dev/null +++ b/cuda/impl/psb_z_cuda_mv_hybg_from_fmt.F90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +#if CUDA_SHORT_VERSION <= 10 + +subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) + + use psb_base_mod + use cusparse_mod + use psb_z_cuda_hybg_mat_mod, psb_protect_name => psb_z_cuda_mv_hybg_from_fmt + implicit none + + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + class default + call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info) + if (info /= 0) return + call a%to_gpu(info) + end select +end subroutine psb_z_cuda_mv_hybg_from_fmt +#endif diff --git a/cuda/ivectordev.c b/cuda/ivectordev.c new file mode 100644 index 00000000..241f1115 --- /dev/null +++ b/cuda/ivectordev.c @@ -0,0 +1,178 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "ivectordev.h" + + +int registerMappedInt(void *buff, void **d_p, int n, int dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(int)); +} + +int writeMultiVecDeviceInt(void* deviceVec, int* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(int)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + + return(i); +} + +int writeMultiVecDeviceIntR2(void* deviceVec, int* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceInt(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceIntR2",i); + } + return(i); +} + +int readMultiVecDeviceInt(void* deviceVec, int* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(int)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceInt",i); + } + return(i); +} + +int readMultiVecDeviceIntR2(void* deviceVec, int* hostVec, int ld) +{ int i; + i = readMultiVecDeviceInt(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceIntR2",i); + } + return(i); +} + + +int setscalMultiVecDeviceInt(int val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuIsetscal(handle, first, last, indexBase, val, (int *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceInt(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + int beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0; + else if (dupl == INS_ADD) + beta = 1; + else + beta = 0; + + spgpuIscat(handle, (int *) devVecX->v_, n, (int *)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceInt(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + int *hv = (int *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuIgath(handle,hv, n, idx,indexBase, (int *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, int beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceInt(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, int beta) +{ int i=0; + int *hv = (int *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuIscat(handle, (int *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} diff --git a/cuda/ivectordev.h b/cuda/ivectordev.h new file mode 100644 index 00000000..2db54be4 --- /dev/null +++ b/cuda/ivectordev.h @@ -0,0 +1,61 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedInt(void *, void **, int, int); +int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); +int writeMultiVecDeviceIntR2(void* deviceMultiVec, int* hostMultiVec, int ld); +int readMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); +int readMultiVecDeviceIntR2(void* deviceMultiVec, int* hostMultiVec, int ld); + +int setscalMultiVecDeviceInt(int val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceInt(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceInt(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceIntVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, int beta); +int iscatMultiVecDeviceInt(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, int beta); diff --git a/cuda/psb_base_vectordev_mod.F90 b/cuda/psb_base_vectordev_mod.F90 new file mode 100644 index 00000000..da02b2f3 --- /dev/null +++ b/cuda/psb_base_vectordev_mod.F90 @@ -0,0 +1,97 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_base_vectordev_mod + use iso_c_binding + use core_mod + + type, bind(c) :: multivec_dev_parms + integer(c_int) :: count + integer(c_int) :: element_type + integer(c_int) :: pitch + integer(c_int) :: size + end type multivec_dev_parms + + interface + function FallocMultiVecDevice(deviceVec,count,Size,elementType) & + & result(res) bind(c,name='FallocMultiVecDevice') + use iso_c_binding + integer(c_int) :: res + integer(c_int), value :: count,Size,elementType + type(c_ptr) :: deviceVec + end function FallocMultiVecDevice + end interface + + interface + subroutine unregisterMapped(buf) & + & bind(c,name='unregisterMapped') + use iso_c_binding + type(c_ptr), value :: buf + end subroutine unregisterMapped + end interface + + interface + subroutine freeMultiVecDevice(deviceVec) & + & bind(c,name='freeMultiVecDevice') + use iso_c_binding + type(c_ptr), value :: deviceVec + end subroutine freeMultiVecDevice + end interface + + interface + function getMultiVecDeviceSize(deviceVec) & + & bind(c,name='getMultiVecDeviceSize') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDeviceSize + end interface + + interface + function getMultiVecDeviceCount(deviceVec) & + & bind(c,name='getMultiVecDeviceCount') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDeviceCount + end interface + + interface + function getMultiVecDevicePitch(deviceVec) & + & bind(c,name='getMultiVecDevicePitch') result(res) + use iso_c_binding + type(c_ptr), value :: deviceVec + integer(c_int) :: res + end function getMultiVecDevicePitch + end interface + +end module psb_base_vectordev_mod diff --git a/cuda/psb_c_cuda_csrg_mat_mod.F90 b/cuda/psb_c_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..1fdeec4a --- /dev/null +++ b/cuda/psb_c_cuda_csrg_mat_mod.F90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_csrg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! + type(c_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_c_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_c_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => c_cuda_csrg_free + procedure, pass(a) :: mold => psb_c_cuda_csrg_mold + procedure, pass(a) :: is_host => c_cuda_csrg_is_host + procedure, pass(a) :: is_dev => c_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => c_cuda_csrg_is_sync + procedure, pass(a) :: set_host => c_cuda_csrg_set_host + procedure, pass(a) :: set_dev => c_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => c_cuda_csrg_set_sync + procedure, pass(a) :: sync => c_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_c_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_c_cuda_csrg_from_gpu + final :: c_cuda_csrg_finalize + end type psb_c_cuda_csrg_sparse_mat + + private :: c_cuda_csrg_get_nzeros, c_cuda_csrg_free, c_cuda_csrg_get_fmt, & + & c_cuda_csrg_get_size, c_cuda_csrg_sizeof, c_cuda_csrg_get_nz_row + + + interface + subroutine psb_c_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_c_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_c_cuda_csrg_reallocate_nz(nz,a) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_csrg_mold(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_mold + end interface + + interface + subroutine psb_c_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_c_cuda_csrg_from_gpu(a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_c_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_c_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmv + end interface + interface + subroutine psb_c_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_csrg_csmm + end interface + + interface + subroutine psb_c_cuda_csrg_scal(d,a,info,side) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_csrg_scal + end interface + + interface + subroutine psb_c_cuda_csrg_scals(d,a,info) + import :: psb_c_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_csrg_sizeof + + function c_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function c_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine c_cuda_csrg_set_host(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_cuda_csrg_set_host + + subroutine c_cuda_csrg_set_dev(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_cuda_csrg_set_dev + + subroutine c_cuda_csrg_set_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_cuda_csrg_set_sync + + function c_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_cuda_csrg_is_dev + + function c_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_cuda_csrg_is_host + + function c_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_c_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_cuda_csrg_is_sync + + + subroutine c_cuda_csrg_sync(a) + implicit none + class(psb_c_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_cuda_csrg_sync + + subroutine c_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_cuda_csrg_free + + subroutine c_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_c_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine c_cuda_csrg_finalize + +end module psb_c_cuda_csrg_mat_mod diff --git a/cuda/psb_c_cuda_diag_mat_mod.F90 b/cuda/psb_c_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..93ffe498 --- /dev/null +++ b/cuda/psb_c_cuda_diag_mat_mod.F90 @@ -0,0 +1,289 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_c_dia_mat_mod + + type, extends(psb_c_dia_sparse_mat) :: psb_c_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => c_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_c_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_c_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_c_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_c_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_c_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => c_cuda_diag_free + procedure, pass(a) :: mold => psb_c_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_diag_to_gpu + final :: c_cuda_diag_finalize + end type psb_c_cuda_diag_sparse_mat + + private :: c_cuda_diag_get_nzeros, c_cuda_diag_free, c_cuda_diag_get_fmt, & + & c_cuda_diag_get_size, c_cuda_diag_sizeof, c_cuda_diag_get_nz_row + + + interface + subroutine psb_c_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_diag_vect_mv + end interface + + interface + subroutine psb_c_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_diag_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_c_cuda_diag_reallocate_nz(nz,a) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_diag_mold(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_diag_mold + end interface + + interface + subroutine psb_c_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_c_cuda_diag_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_diag_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_diag_from_coo(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_c_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_c_cuda_diag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_c_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_diag_csmv + end interface + interface + subroutine psb_c_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_diag_csmm + end interface + + interface + subroutine psb_c_cuda_diag_scal(d,a,info, side) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_diag_scal + end interface + + interface + subroutine psb_c_cuda_diag_scals(d,a,info) + import :: psb_c_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_c_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_diag_sizeof + + function c_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function c_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_dia_sparse_mat%free() + + return + + end subroutine c_cuda_diag_free + + subroutine c_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_c_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_cuda_diag_finalize + +end module psb_c_cuda_diag_mat_mod diff --git a/cuda/psb_c_cuda_dnsg_mat_mod.F90 b/cuda/psb_c_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..b0ca8c46 --- /dev/null +++ b/cuda/psb_c_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,275 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_dns_mat_mod + use dnsdev_mod + + type, extends(psb_c_dns_sparse_mat) :: psb_c_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_c_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_c_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_c_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_c_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_c_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_c_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_c_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => c_cuda_dnsg_free + procedure, pass(a) :: mold => psb_c_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_dnsg_to_gpu + final :: c_cuda_dnsg_finalize + end type psb_c_cuda_dnsg_sparse_mat + + private :: c_cuda_dnsg_get_nzeros, c_cuda_dnsg_free, c_cuda_dnsg_get_fmt, & + & c_cuda_dnsg_get_size, c_cuda_dnsg_get_nz_row + + + interface + subroutine psb_c_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_c_cuda_dnsg_mold(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_dnsg_mold + end interface + + interface + subroutine psb_c_cuda_dnsg_to_gpu(a,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_c_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_c_cuda_dnsg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_c_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function c_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function c_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_dns_sparse_mat%free() + + return + + end subroutine c_cuda_dnsg_free + + subroutine c_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_c_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_cuda_dnsg_finalize + +end module psb_c_cuda_dnsg_mat_mod diff --git a/cuda/psb_c_cuda_elg_mat_mod.F90 b/cuda/psb_c_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..c9b48005 --- /dev/null +++ b/cuda/psb_c_cuda_elg_mat_mod.F90 @@ -0,0 +1,456 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_elg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_ell_sparse_mat) :: psb_c_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_elg_scals + procedure, pass(a) :: scalv => psb_c_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => c_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => c_cuda_elg_free + procedure, pass(a) :: mold => psb_c_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_c_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_c_cuda_elg_csput_v + procedure, pass(a) :: is_host => c_cuda_elg_is_host + procedure, pass(a) :: is_dev => c_cuda_elg_is_dev + procedure, pass(a) :: is_sync => c_cuda_elg_is_sync + procedure, pass(a) :: set_host => c_cuda_elg_set_host + procedure, pass(a) :: set_dev => c_cuda_elg_set_dev + procedure, pass(a) :: set_sync => c_cuda_elg_set_sync + procedure, pass(a) :: sync => c_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_c_cuda_elg_asb + final :: c_cuda_elg_finalize + end type psb_c_cuda_elg_sparse_mat + + private :: c_cuda_elg_get_nzeros, c_cuda_elg_free, c_cuda_elg_get_fmt, & + & c_cuda_elg_get_size, c_cuda_elg_sizeof, c_cuda_elg_get_nz_row, c_cuda_elg_sync + + + interface + subroutine psb_c_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_elg_vect_mv + end interface + + interface + subroutine psb_c_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_elg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_c_cuda_elg_reallocate_nz(nz,a) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_elg_mold(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_mold + end interface + + interface + subroutine psb_c_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_csput_a + end interface + + interface + subroutine psb_c_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_c_base_vect_type,& + & psb_i_base_vect_type + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_csput_v + end interface + + interface + subroutine psb_c_cuda_elg_from_gpu(a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_from_gpu + end interface + + interface + subroutine psb_c_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_elg_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_elg_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_elg_from_coo(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_c_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_c_cuda_elg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_c_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_elg_csmv + end interface + interface + subroutine psb_c_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_elg_csmm + end interface + + interface + subroutine psb_c_cuda_elg_scal(d,a,info, side) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_elg_scal + end interface + + interface + subroutine psb_c_cuda_elg_scals(d,a,info) + import :: psb_c_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_elg_scals + end interface + + interface + subroutine psb_c_cuda_elg_asb(a) + import :: psb_c_cuda_elg_sparse_mat + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_elg_sizeof + + function c_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function c_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine c_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = czero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine c_cuda_elg_reinit + + subroutine c_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine c_cuda_elg_free + + subroutine c_cuda_elg_sync(a) + implicit none + class(psb_c_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_cuda_elg_sync + + subroutine c_cuda_elg_set_host(a) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_cuda_elg_set_host + + subroutine c_cuda_elg_set_dev(a) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_cuda_elg_set_dev + + subroutine c_cuda_elg_set_sync(a) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_cuda_elg_set_sync + + function c_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_cuda_elg_is_dev + + function c_cuda_elg_is_host(a) result(res) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_cuda_elg_is_host + + function c_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_c_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_cuda_elg_is_sync + + subroutine c_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_c_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine c_cuda_elg_finalize + +end module psb_c_cuda_elg_mat_mod diff --git a/cuda/psb_c_cuda_hdiag_mat_mod.F90 b/cuda/psb_c_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..f06e501e --- /dev/null +++ b/cuda/psb_c_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_c_hdia_mat_mod + + type, extends(psb_c_hdia_sparse_mat) :: psb_c_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => c_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => c_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_c_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_c_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_c_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_c_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_c_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => c_cuda_hdiag_free + procedure, pass(a) :: mold => psb_c_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hdiag_to_gpu + final :: c_cuda_hdiag_finalize + end type psb_c_cuda_hdiag_sparse_mat + + private :: c_cuda_hdiag_get_nzeros, c_cuda_hdiag_free, c_cuda_hdiag_get_fmt, & + & c_cuda_hdiag_get_size, c_cuda_hdiag_sizeof, c_cuda_hdiag_get_nz_row + + + interface + subroutine psb_c_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_c_base_vect_type +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_c_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_c_cuda_hdiag_mold(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hdiag_mold + end interface + + interface + subroutine psb_c_cuda_hdiag_to_gpu(a,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_c_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_c_cuda_hdiag_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_c_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_c_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function c_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function c_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hdia_sparse_mat%free() + + return + + end subroutine c_cuda_hdiag_free + + subroutine c_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_c_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hdia_sparse_mat%free() + + return + end subroutine c_cuda_hdiag_finalize + +end module psb_c_cuda_hdiag_mat_mod diff --git a/cuda/psb_c_cuda_hlg_mat_mod.F90 b/cuda/psb_c_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..e98f2474 --- /dev/null +++ b/cuda/psb_c_cuda_hlg_mat_mod.F90 @@ -0,0 +1,379 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_hlg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_hll_sparse_mat) :: psb_c_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => c_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_c_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_c_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => c_cuda_hlg_free + procedure, pass(a) :: mold => psb_c_cuda_hlg_mold + procedure, pass(a) :: is_host => c_cuda_hlg_is_host + procedure, pass(a) :: is_dev => c_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => c_cuda_hlg_is_sync + procedure, pass(a) :: set_host => c_cuda_hlg_set_host + procedure, pass(a) :: set_dev => c_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => c_cuda_hlg_set_sync + procedure, pass(a) :: sync => c_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_c_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_c_cuda_hlg_to_gpu + final :: c_cuda_hlg_finalize + end type psb_c_cuda_hlg_sparse_mat + + private :: c_cuda_hlg_get_nzeros, c_cuda_hlg_free, c_cuda_hlg_get_fmt, & + & c_cuda_hlg_get_size, c_cuda_hlg_sizeof, c_cuda_hlg_get_nz_row + + + interface + subroutine psb_c_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_c_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_c_base_vect_type + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_c_cuda_hlg_reallocate_nz(nz,a) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_hlg_mold(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hlg_mold + end interface + + interface + subroutine psb_c_cuda_hlg_from_gpu(a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_c_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_c_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_c_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hlg_csmv + end interface + interface + subroutine psb_c_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hlg_csmm + end interface + + interface + subroutine psb_c_cuda_hlg_scal(d,a,info, side) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_hlg_scal + end interface + + interface + subroutine psb_c_cuda_hlg_scals(d,a,info) + import :: psb_c_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_hlg_sizeof + + function c_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function c_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_c_hll_sparse_mat%free() + + return + + end subroutine c_cuda_hlg_free + + + subroutine c_cuda_hlg_sync(a) + implicit none + class(psb_c_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_c_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine c_cuda_hlg_sync + + subroutine c_cuda_hlg_set_host(a) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_cuda_hlg_set_host + + subroutine c_cuda_hlg_set_dev(a) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_cuda_hlg_set_dev + + subroutine c_cuda_hlg_set_sync(a) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_cuda_hlg_set_sync + + function c_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_cuda_hlg_is_dev + + function c_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_cuda_hlg_is_host + + function c_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_c_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_cuda_hlg_is_sync + + + subroutine c_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_c_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine c_cuda_hlg_finalize + +end module psb_c_cuda_hlg_mat_mod diff --git a/cuda/psb_c_cuda_hybg_mat_mod.F90 b/cuda/psb_c_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..1c94bc0f --- /dev/null +++ b/cuda/psb_c_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_c_cuda_hybg_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use cusparse_mod + + type, extends(psb_c_csr_sparse_mat) :: psb_c_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(c_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => c_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => c_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_c_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_c_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_c_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_c_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_c_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_c_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => c_cuda_hybg_free + procedure, pass(a) :: mold => psb_c_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_c_cuda_hybg_to_gpu + final :: c_cuda_hybg_finalize + end type psb_c_cuda_hybg_sparse_mat + + private :: c_cuda_hybg_get_nzeros, c_cuda_hybg_free, c_cuda_hybg_get_fmt, & + & c_cuda_hybg_get_size, c_cuda_hybg_sizeof, c_cuda_hybg_get_nz_row + + + interface + subroutine psb_c_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_c_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_c_base_vect_type, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_c_cuda_hybg_reallocate_nz(nz,a) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_c_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_c_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_c_cuda_hybg_mold(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hybg_mold + end interface + + interface + subroutine psb_c_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_c_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_c_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_c_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_c_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_c_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_c_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_c_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hybg_csmv + end interface + interface + subroutine psb_c_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_cuda_hybg_csmm + end interface + + interface + subroutine psb_c_cuda_hybg_scal(d,a,info,side) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_cuda_hybg_scal + end interface + + interface + subroutine psb_c_cuda_hybg_scals(d,a,info) + import :: psb_c_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_c_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function c_cuda_hybg_sizeof + + function c_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function c_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_c_csr_sparse_mat%free() + + return + + end subroutine c_cuda_hybg_free + + subroutine c_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_c_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine c_cuda_hybg_finalize + +end module psb_c_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 new file mode 100644 index 00000000..2c2a4f61 --- /dev/null +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -0,0 +1,2099 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_c_vect_mod + use psb_cuda_env_mod + use psb_i_vect_mod + use psb_i_cuda_vect_mod + use psb_i_vectordev_mod + use psb_c_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_vect_type) :: psb_c_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + complex(c_float_complex), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + complex(c_float_complex), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => c_cuda_get_nrows + procedure, nopass :: get_fmt => c_cuda_get_fmt + + procedure, pass(x) :: all => c_cuda_all + procedure, pass(x) :: zero => c_cuda_zero + procedure, pass(x) :: asb_m => c_cuda_asb_m + procedure, pass(x) :: sync => c_cuda_sync + procedure, pass(x) :: sync_space => c_cuda_sync_space + procedure, pass(x) :: bld_x => c_cuda_bld_x + procedure, pass(x) :: bld_mn => c_cuda_bld_mn + procedure, pass(x) :: free => c_cuda_free + procedure, pass(x) :: ins_a => c_cuda_ins_a + procedure, pass(x) :: ins_v => c_cuda_ins_v + procedure, pass(x) :: is_host => c_cuda_is_host + procedure, pass(x) :: is_dev => c_cuda_is_dev + procedure, pass(x) :: is_sync => c_cuda_is_sync + procedure, pass(x) :: set_host => c_cuda_set_host + procedure, pass(x) :: set_dev => c_cuda_set_dev + procedure, pass(x) :: set_sync => c_cuda_set_sync + procedure, pass(x) :: set_scal => c_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => c_cuda_set_vect + procedure, pass(x) :: gthzv_x => c_cuda_gthzv_x + procedure, pass(y) :: sctb => c_cuda_sctb + procedure, pass(y) :: sctb_x => c_cuda_sctb_x + procedure, pass(x) :: gthzbuf => c_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => c_cuda_sctb_buf + procedure, pass(x) :: new_buffer => c_cuda_new_buffer + procedure, nopass :: device_wait => c_cuda_device_wait + procedure, pass(x) :: free_buffer => c_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => c_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => c_cuda_dot_v + procedure, pass(x) :: dot_a => c_cuda_dot_a + procedure, pass(y) :: axpby_v => c_cuda_axpby_v + procedure, pass(y) :: axpby_a => c_cuda_axpby_a + procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz + procedure, pass(y) :: mlt_v => c_cuda_mlt_v + procedure, pass(y) :: mlt_a => c_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => c_cuda_mlt_v_2 + procedure, pass(x) :: scal => c_cuda_scal + procedure, pass(x) :: nrm2 => c_cuda_nrm2 + procedure, pass(x) :: amax => c_cuda_amax + procedure, pass(x) :: asum => c_cuda_asum + procedure, pass(x) :: absval1 => c_cuda_absval1 + procedure, pass(x) :: absval2 => c_cuda_absval2 + + final :: c_cuda_vect_finalize + end type psb_c_vect_cuda + + public :: psb_c_vect_cuda_ + private :: constructor + interface psb_c_vect_cuda_ + module procedure constructor + end interface psb_c_vect_cuda_ + +contains + + function constructor(x) result(this) + complex(psb_spk_) :: x(:) + type(psb_c_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine c_cuda_device_wait() + call psb_cudaSync() + end subroutine c_cuda_device_wait + + subroutine c_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloatComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloatComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceFloatComplex(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(x%dt_buf,y,n) + + end select + + end subroutine c_cuda_gthzv_x + + subroutine c_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_c_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceFloatComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readFloatComplex(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceFloatComplex(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readFloatComplex(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine c_cuda_gthzbuf + + subroutine c_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_c_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine c_cuda_sctb + + subroutine c_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloatComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeFloatComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloatComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloatComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeFloatComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatComplex(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine c_cuda_sctb_x + + subroutine c_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta + class(psb_c_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeFloatComplex(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceFloatComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeFloatComplex(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceFloatComplex(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine c_cuda_sctb_buf + + + subroutine c_cuda_bld_x(x,this) + use psb_base_mod + complex(psb_spk_), intent(in) :: this(:) + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine c_cuda_bld_x + + subroutine c_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'c_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine c_cuda_bld_mn + + subroutine c_cuda_set_host(x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine c_cuda_set_host + + subroutine c_cuda_set_dev(x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine c_cuda_set_dev + + subroutine c_cuda_set_sync(x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine c_cuda_set_sync + + function c_cuda_is_dev(x) result(res) + implicit none + class(psb_c_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_cuda_is_dev + + function c_cuda_is_host(x) result(res) + implicit none + class(psb_c_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_cuda_is_host + + function c_cuda_is_sync(x) result(res) + implicit none + class(psb_c_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_cuda_is_sync + + + function c_cuda_get_nrows(x) result(res) + implicit none + class(psb_c_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function c_cuda_get_nrows + + function c_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'cGPU' + end function c_cuda_get_fmt + + subroutine c_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_c_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine c_cuda_all + + subroutine c_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(czero) + end subroutine c_cuda_zero + + subroutine c_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_c_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'c_cuda_sync') + end if + + end subroutine c_cuda_sync + + subroutine c_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine c_cuda_free + + subroutine c_cuda_set_scal(x,val,first,last) + class(psb_c_vect_cuda), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine c_cuda_set_scal +!!$ +!!$ subroutine c_cuda_set_vect(x,val) +!!$ class(psb_c_vect_cuda), intent(inout) :: x +!!$ complex(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_c_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine c_cuda_set_vect + + + + function c_cuda_dot_v(n,x,y) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: ddot + integer(psb_ipk_) :: info + + res = czero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_c_vect + ! + select type(yy => y) + type is (psb_c_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_c_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'c_cuda_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function c_cuda_dot_v + + function c_cuda_dot_a(n,x,y) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + complex(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function c_cuda_dot_a + + subroutine c_cuda_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_cuda), intent(inout) :: y + complex(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_c_vect_cuda) + ! Do something different here + if ((beta /= czero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + class is (psb_c_vect_cuda) + select type(yy => y) + class is (psb_c_vect_cuda) + select type(zz => z) + class is (psb_c_vect_cuda) + ! Do something different here + if ((beta /= czero).and.yy%is_host())& + & call yy%sync() + if ((delta /= czero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) + class is (psb_c_vect_cuda) + select type(yy => y) + class is (psb_c_vect_cuda) + select type(zz => z) + class is (psb_c_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + type is (psb_c_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_c_vect_cuda) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,cone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine c_cuda_mlt_v + + subroutine c_cuda_mlt_a(x, y, info) + use psi_serial_mod + implicit none + complex(psb_spk_), intent(in) :: x(:) + class(psb_c_vect_cuda), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_c_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine c_cuda_mlt_a + + subroutine c_cuda_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + complex(psb_spk_), intent(in) :: alpha,beta + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(in) :: y(:) + class(psb_c_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine c_cuda_mlt_a_2 + + subroutine c_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + complex(psb_spk_), intent(in) :: alpha,beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_c_vect_cuda) + select type (yy => y) + type is (psb_c_vect_cuda) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= czero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= czero).and.(z%is_dev())) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= czero).and.(z%is_dev())) call z%sync() + call z%psb_c_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine c_cuda_mlt_v_2 + + subroutine c_cuda_scal(alpha, x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + complex(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine c_cuda_scal + + + function c_cuda_nrm2(n,x) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_cuda_nrm2 + + function c_cuda_amax(n,x) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_cuda_amax + + function c_cuda_asum(n,x) result(res) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDeviceComplex(res,n,x%deviceVect) + + end function c_cuda_asum + + subroutine c_cuda_absval1(x) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,cone,x%deviceVect) + + end subroutine c_cuda_absval1 + + subroutine c_cuda_absval2(x,y) + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_c_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,cone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_c_base_vect_type%absval(y) + end select + end subroutine c_cuda_absval2 + + + subroutine c_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine c_cuda_vect_finalize + + subroutine c_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_c_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_c_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceFloatComplex(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine c_cuda_ins_v + + subroutine c_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_c_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_c_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine c_cuda_ins_a + +end module psb_c_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_c_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_c_multivect_mod + use psb_c_base_multivect_mod + use psb_cuda_env_mod + use psb_i_multivect_mod + use psb_i_cuda_multivect_mod + use psb_c_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_multivect_type) :: psb_c_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => c_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => c_cuda_multi_get_ncols + procedure, nopass :: get_fmt => c_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => c_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => c_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => c_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => c_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => c_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => c_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => c_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => c_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => c_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => c_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => c_cuda_multi_amax +!!$ procedure, pass(x) :: asum => c_cuda_multi_asum + procedure, pass(x) :: all => c_cuda_multi_all + procedure, pass(x) :: zero => c_cuda_multi_zero + procedure, pass(x) :: asb => c_cuda_multi_asb + procedure, pass(x) :: sync => c_cuda_multi_sync + procedure, pass(x) :: sync_space => c_cuda_multi_sync_space + procedure, pass(x) :: bld_x => c_cuda_multi_bld_x + procedure, pass(x) :: bld_n => c_cuda_multi_bld_n + procedure, pass(x) :: free => c_cuda_multi_free + procedure, pass(x) :: ins => c_cuda_multi_ins + procedure, pass(x) :: is_host => c_cuda_multi_is_host + procedure, pass(x) :: is_dev => c_cuda_multi_is_dev + procedure, pass(x) :: is_sync => c_cuda_multi_is_sync + procedure, pass(x) :: set_host => c_cuda_multi_set_host + procedure, pass(x) :: set_dev => c_cuda_multi_set_dev + procedure, pass(x) :: set_sync => c_cuda_multi_set_sync + procedure, pass(x) :: set_scal => c_cuda_multi_set_scal + procedure, pass(x) :: set_vect => c_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => c_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => c_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => c_cuda_multi_sctb_x + final :: c_cuda_multi_vect_finalize + end type psb_c_multivect_cuda + + public :: psb_c_multivect_cuda + private :: constructor + interface psb_c_multivect_cuda + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + complex(psb_spk_) :: x(:,:) + type(psb_c_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + + +!!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_spk_) :: y(:) +!!$ class(psb_c_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine c_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine c_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ complex(psb_spk_) :: beta, x(:) +!!$ class(psb_c_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_c_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine c_cuda_multi_sctb +!!$ +!!$ subroutine c_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_spk_) :: beta, x(:) +!!$ class(psb_c_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine c_cuda_multi_sctb_x + + + subroutine c_cuda_multi_bld_x(x,this) + use psb_base_mod + complex(psb_spk_), intent(in) :: this(:,:) + class(psb_c_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine c_cuda_multi_bld_x + + subroutine c_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'c_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine c_cuda_multi_bld_n + + + subroutine c_cuda_multi_set_host(x) + implicit none + class(psb_c_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine c_cuda_multi_set_host + + subroutine c_cuda_multi_set_dev(x) + implicit none + class(psb_c_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine c_cuda_multi_set_dev + + subroutine c_cuda_multi_set_sync(x) + implicit none + class(psb_c_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine c_cuda_multi_set_sync + + function c_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_cuda_multi_is_dev + + function c_cuda_multi_is_host(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_cuda_multi_is_host + + function c_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_cuda_multi_is_sync + + + function c_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function c_cuda_multi_get_nrows + + function c_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_c_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function c_cuda_multi_get_ncols + + function c_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'cGPU' + end function c_cuda_multi_get_fmt + +!!$ function c_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ class(psb_c_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_spk_) :: res +!!$ complex(psb_spk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = czero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_c_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_c_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_c_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'c_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function c_cuda_multi_dot_v +!!$ +!!$ function c_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ complex(psb_spk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_spk_) :: res +!!$ complex(psb_spk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function c_cuda_multi_dot_a +!!$ +!!$ subroutine c_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_c_base_multivect_type), intent(inout) :: x +!!$ class(psb_c_multivect_cuda), intent(inout) :: y +!!$ complex(psb_spk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_c_base_multivect_type) +!!$ if ((beta /= czero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_c_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= czero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_c_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_c_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine c_cuda_multi_mlt_v +!!$ +!!$ subroutine c_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: x(:) +!!$ class(psb_c_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_c_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine c_cuda_multi_mlt_a +!!$ +!!$ subroutine c_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: alpha,beta +!!$ complex(psb_spk_), intent(in) :: x(:) +!!$ complex(psb_spk_), intent(in) :: y(:) +!!$ class(psb_c_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine c_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine c_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: alpha,beta +!!$ class(psb_c_base_multivect_type), intent(inout) :: x +!!$ class(psb_c_base_multivect_type), intent(inout) :: y +!!$ class(psb_c_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_c_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_c_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_c_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine c_cuda_multi_mlt_v_2 + + + subroutine c_cuda_multi_set_scal(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_c_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine c_cuda_multi_set_scal + + subroutine c_cuda_multi_set_vect(x,val) + class(psb_c_multivect_cuda), intent(inout) :: x + complex(psb_spk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_c_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine c_cuda_multi_set_vect + + + +!!$ subroutine c_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ complex(psb_spk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_c_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine c_cuda_multi_scal +!!$ +!!$ +!!$ function c_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function c_cuda_multi_nrm2 +!!$ +!!$ function c_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function c_cuda_multi_amax +!!$ +!!$ function c_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_c_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function c_cuda_multi_asum + + subroutine c_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=czero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'c_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine c_cuda_multi_all + + subroutine c_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_c_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=czero + call x%set_host() + end subroutine c_cuda_multi_zero + + subroutine c_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_c_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc0) then + dev_ = mod(iam,count) + else + dev_ = 0 + end if + info = psb_C_gpu_init(dev_) + end if + if (info == 0) info = initFcusparse() + if (info /= 0) then + call psb_errpush(psb_err_internal_error_,'psb_cuda_init') + goto 9999 + end if + call psb_cudaCreateHandle() + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(ctxt,err_act) + + return + + end subroutine psb_cuda_init + + + subroutine psb_cuda_DeviceSync() + call psb_cudaSync() + end subroutine psb_cuda_DeviceSync + + function psb_cuda_getDeviceCount() result(res) + integer :: res + res = psb_cuda_inner_getDeviceCount() + end function psb_cuda_getDeviceCount + + subroutine psb_cuda_exit() + integer :: res + res = closeFcusparse() + call psb_cuda_innerClose() + call psb_cuda_innerReset() + end subroutine psb_cuda_exit + + function psb_cuda_DeviceHasUVA() result(res) + logical :: res + res = (psb_C_DeviceHasUVA() == 1) + end function psb_cuda_DeviceHasUVA + + function psb_cuda_MultiProcessors() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MultiProcessors() + end function psb_cuda_MultiProcessors + + function psb_cuda_MaxRegistersPerBlock() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MaxRegistersPerBlock() + end function psb_cuda_MaxRegistersPerBlock + + function psb_cuda_MaxThreadsPerMP() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MaxThreadsPerMP() + end function psb_cuda_MaxThreadsPerMP + + function psb_cuda_WarpSize() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_WarpSize() + end function psb_cuda_WarpSize + + function psb_cuda_MemoryClockRate() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MemoryClockRate() + end function psb_cuda_MemoryClockRate + + function psb_cuda_MemoryBusWidth() result(res) + integer(psb_ipk_) :: res + res = psb_C_get_MemoryBusWidth() + end function psb_cuda_MemoryBusWidth + + function psb_cuda_MemoryPeakBandwidth() result(res) + real(psb_dpk_) :: res + ! Formula here: 2*ClockRate(KHz)*BusWidth(bit) + ! normalization: bit/byte, KHz/MHz + ! output: MBytes/s + res = 2.d0*0.125d0*1.d-3*psb_C_get_MemoryBusWidth()*psb_C_get_MemoryClockRate() + end function psb_cuda_MemoryPeakBandwidth + + function psb_cuda_DeviceName() result(res) + character(len=256) :: res + character :: cstring(256) + call psb_C_cpy_NameString(cstring) + call stringc2f(cstring,res) + end function psb_cuda_DeviceName + + + subroutine stringc2f(cstring,fstring) + character(c_char) :: cstring(*) + character(len=*) :: fstring + integer :: i + + i = 1 + do + if (cstring(i) == c_null_char) exit + if (i > len(fstring)) exit + fstring(i:i) = cstring(i) + i = i + 1 + end do + do + if (i > len(fstring)) exit + fstring(i:i) = " " + i = i + 1 + end do + return + end subroutine stringc2f + +end module psb_cuda_env_mod diff --git a/cuda/psb_cuda_mod.F90 b/cuda/psb_cuda_mod.F90 new file mode 100644 index 00000000..81ce3e31 --- /dev/null +++ b/cuda/psb_cuda_mod.F90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_cuda_mod + use psb_const_mod + use psb_cuda_env_mod + + use psb_i_cuda_vect_mod + use psb_s_cuda_vect_mod + use psb_d_cuda_vect_mod + use psb_c_cuda_vect_mod + use psb_z_cuda_vect_mod + + use psb_i_cuda_multivect_mod + use psb_s_cuda_multivect_mod + use psb_d_cuda_multivect_mod + use psb_c_cuda_multivect_mod + use psb_z_cuda_multivect_mod + + use psb_d_ell_mat_mod + use psb_d_cuda_elg_mat_mod + use psb_s_ell_mat_mod + use psb_s_cuda_elg_mat_mod + use psb_z_ell_mat_mod + use psb_z_cuda_elg_mat_mod + use psb_c_ell_mat_mod + use psb_c_cuda_elg_mat_mod + + use psb_s_hll_mat_mod + use psb_s_cuda_hlg_mat_mod + use psb_d_hll_mat_mod + use psb_d_cuda_hlg_mat_mod + use psb_c_hll_mat_mod + use psb_c_cuda_hlg_mat_mod + use psb_z_hll_mat_mod + use psb_z_cuda_hlg_mat_mod + + use psb_s_cuda_csrg_mat_mod + use psb_d_cuda_csrg_mat_mod + use psb_c_cuda_csrg_mat_mod + use psb_z_cuda_csrg_mat_mod +#if CUDA_SHORT_VERSION <= 10 + use psb_s_cuda_hybg_mat_mod + use psb_d_cuda_hybg_mat_mod + use psb_c_cuda_hybg_mat_mod + use psb_z_cuda_hybg_mat_mod +#endif + use psb_d_cuda_diag_mat_mod + use psb_d_cuda_hdiag_mat_mod + + use psb_s_cuda_dnsg_mat_mod + use psb_d_cuda_dnsg_mat_mod + use psb_c_cuda_dnsg_mat_mod + use psb_z_cuda_dnsg_mat_mod + + use psb_s_cuda_hdiag_mat_mod + ! use psb_s_cuda_diag_mat_mod + +end module psb_cuda_mod + diff --git a/cuda/psb_d_cuda_csrg_mat_mod.F90 b/cuda/psb_d_cuda_csrg_mat_mod.F90 new file mode 100644 index 00000000..101959bd --- /dev/null +++ b/cuda/psb_d_cuda_csrg_mat_mod.F90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_csrg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_csrg_sparse_mat + ! + ! cuSPARSE 4.0 CSR format. + ! + ! + ! + ! + ! + type(d_Cmat) :: deviceMat + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_d_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_d_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => d_cuda_csrg_free + procedure, pass(a) :: mold => psb_d_cuda_csrg_mold + procedure, pass(a) :: is_host => d_cuda_csrg_is_host + procedure, pass(a) :: is_dev => d_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => d_cuda_csrg_is_sync + procedure, pass(a) :: set_host => d_cuda_csrg_set_host + procedure, pass(a) :: set_dev => d_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => d_cuda_csrg_set_sync + procedure, pass(a) :: sync => d_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_d_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_d_cuda_csrg_from_gpu + final :: d_cuda_csrg_finalize + end type psb_d_cuda_csrg_sparse_mat + + private :: d_cuda_csrg_get_nzeros, d_cuda_csrg_free, d_cuda_csrg_get_fmt, & + & d_cuda_csrg_get_size, d_cuda_csrg_sizeof, d_cuda_csrg_get_nz_row + + + interface + subroutine psb_d_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_d_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_d_cuda_csrg_reallocate_nz(nz,a) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_csrg_mold(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_mold + end interface + + interface + subroutine psb_d_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_d_cuda_csrg_from_gpu(a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_d_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_d_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmv + end interface + interface + subroutine psb_d_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_csrg_csmm + end interface + + interface + subroutine psb_d_cuda_csrg_scal(d,a,info,side) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_csrg_scal + end interface + + interface + subroutine psb_d_cuda_csrg_scals(d,a,info) + import :: psb_d_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_csrg_sizeof + + function d_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function d_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine d_cuda_csrg_set_host(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_cuda_csrg_set_host + + subroutine d_cuda_csrg_set_dev(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_cuda_csrg_set_dev + + subroutine d_cuda_csrg_set_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_cuda_csrg_set_sync + + function d_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_cuda_csrg_is_dev + + function d_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_cuda_csrg_is_host + + function d_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_d_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_cuda_csrg_is_sync + + + subroutine d_cuda_csrg_sync(a) + implicit none + class(psb_d_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_cuda_csrg_sync + + subroutine d_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_cuda_csrg_free + + subroutine d_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_d_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine d_cuda_csrg_finalize + +end module psb_d_cuda_csrg_mat_mod diff --git a/cuda/psb_d_cuda_diag_mat_mod.F90 b/cuda/psb_d_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..11e4e349 --- /dev/null +++ b/cuda/psb_d_cuda_diag_mat_mod.F90 @@ -0,0 +1,289 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_d_dia_mat_mod + + type, extends(psb_d_dia_sparse_mat) :: psb_d_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => d_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_d_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_d_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_d_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_d_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_d_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => d_cuda_diag_free + procedure, pass(a) :: mold => psb_d_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_diag_to_gpu + final :: d_cuda_diag_finalize + end type psb_d_cuda_diag_sparse_mat + + private :: d_cuda_diag_get_nzeros, d_cuda_diag_free, d_cuda_diag_get_fmt, & + & d_cuda_diag_get_size, d_cuda_diag_sizeof, d_cuda_diag_get_nz_row + + + interface + subroutine psb_d_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_diag_vect_mv + end interface + + interface + subroutine psb_d_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_d_cuda_diag_reallocate_nz(nz,a) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_diag_mold(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_diag_mold + end interface + + interface + subroutine psb_d_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_d_cuda_diag_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_diag_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_diag_from_coo(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_d_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_d_cuda_diag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_d_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_diag_csmv + end interface + interface + subroutine psb_d_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_diag_csmm + end interface + + interface + subroutine psb_d_cuda_diag_scal(d,a,info, side) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_diag_scal + end interface + + interface + subroutine psb_d_cuda_diag_scals(d,a,info) + import :: psb_d_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_d_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_dp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_diag_sizeof + + function d_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function d_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_dia_sparse_mat%free() + + return + + end subroutine d_cuda_diag_free + + subroutine d_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_d_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_cuda_diag_finalize + +end module psb_d_cuda_diag_mat_mod diff --git a/cuda/psb_d_cuda_dnsg_mat_mod.F90 b/cuda/psb_d_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..f11e7823 --- /dev/null +++ b/cuda/psb_d_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,275 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_dns_mat_mod + use dnsdev_mod + + type, extends(psb_d_dns_sparse_mat) :: psb_d_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_d_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_d_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_d_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_d_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_d_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_d_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_d_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => d_cuda_dnsg_free + procedure, pass(a) :: mold => psb_d_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_dnsg_to_gpu + final :: d_cuda_dnsg_finalize + end type psb_d_cuda_dnsg_sparse_mat + + private :: d_cuda_dnsg_get_nzeros, d_cuda_dnsg_free, d_cuda_dnsg_get_fmt, & + & d_cuda_dnsg_get_size, d_cuda_dnsg_get_nz_row + + + interface + subroutine psb_d_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_d_cuda_dnsg_mold(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_dnsg_mold + end interface + + interface + subroutine psb_d_cuda_dnsg_to_gpu(a,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_d_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_d_cuda_dnsg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_d_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function d_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function d_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_dns_sparse_mat%free() + + return + + end subroutine d_cuda_dnsg_free + + subroutine d_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_d_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_cuda_dnsg_finalize + +end module psb_d_cuda_dnsg_mat_mod diff --git a/cuda/psb_d_cuda_elg_mat_mod.F90 b/cuda/psb_d_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..1af80f2a --- /dev/null +++ b/cuda/psb_d_cuda_elg_mat_mod.F90 @@ -0,0 +1,456 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_elg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_ell_sparse_mat) :: psb_d_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_elg_scals + procedure, pass(a) :: scalv => psb_d_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => d_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => d_cuda_elg_free + procedure, pass(a) :: mold => psb_d_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_d_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_d_cuda_elg_csput_v + procedure, pass(a) :: is_host => d_cuda_elg_is_host + procedure, pass(a) :: is_dev => d_cuda_elg_is_dev + procedure, pass(a) :: is_sync => d_cuda_elg_is_sync + procedure, pass(a) :: set_host => d_cuda_elg_set_host + procedure, pass(a) :: set_dev => d_cuda_elg_set_dev + procedure, pass(a) :: set_sync => d_cuda_elg_set_sync + procedure, pass(a) :: sync => d_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_d_cuda_elg_asb + final :: d_cuda_elg_finalize + end type psb_d_cuda_elg_sparse_mat + + private :: d_cuda_elg_get_nzeros, d_cuda_elg_free, d_cuda_elg_get_fmt, & + & d_cuda_elg_get_size, d_cuda_elg_sizeof, d_cuda_elg_get_nz_row, d_cuda_elg_sync + + + interface + subroutine psb_d_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_elg_vect_mv + end interface + + interface + subroutine psb_d_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_d_cuda_elg_reallocate_nz(nz,a) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_elg_mold(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_mold + end interface + + interface + subroutine psb_d_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_csput_a + end interface + + interface + subroutine psb_d_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_d_base_vect_type,& + & psb_i_base_vect_type + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_csput_v + end interface + + interface + subroutine psb_d_cuda_elg_from_gpu(a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_from_gpu + end interface + + interface + subroutine psb_d_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_elg_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_elg_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_elg_from_coo(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_d_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_d_cuda_elg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_d_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_elg_csmv + end interface + interface + subroutine psb_d_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_elg_csmm + end interface + + interface + subroutine psb_d_cuda_elg_scal(d,a,info, side) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_elg_scal + end interface + + interface + subroutine psb_d_cuda_elg_scals(d,a,info) + import :: psb_d_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_elg_scals + end interface + + interface + subroutine psb_d_cuda_elg_asb(a) + import :: psb_d_cuda_elg_sparse_mat + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_elg_sizeof + + function d_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function d_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine d_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = dzero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine d_cuda_elg_reinit + + subroutine d_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine d_cuda_elg_free + + subroutine d_cuda_elg_sync(a) + implicit none + class(psb_d_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_cuda_elg_sync + + subroutine d_cuda_elg_set_host(a) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_cuda_elg_set_host + + subroutine d_cuda_elg_set_dev(a) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_cuda_elg_set_dev + + subroutine d_cuda_elg_set_sync(a) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_cuda_elg_set_sync + + function d_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_cuda_elg_is_dev + + function d_cuda_elg_is_host(a) result(res) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_cuda_elg_is_host + + function d_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_d_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_cuda_elg_is_sync + + subroutine d_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_d_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine d_cuda_elg_finalize + +end module psb_d_cuda_elg_mat_mod diff --git a/cuda/psb_d_cuda_hdiag_mat_mod.F90 b/cuda/psb_d_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..46b63b43 --- /dev/null +++ b/cuda/psb_d_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_d_hdia_mat_mod + + type, extends(psb_d_hdia_sparse_mat) :: psb_d_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => d_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => d_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_d_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_d_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_d_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_d_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_d_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => d_cuda_hdiag_free + procedure, pass(a) :: mold => psb_d_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hdiag_to_gpu + final :: d_cuda_hdiag_finalize + end type psb_d_cuda_hdiag_sparse_mat + + private :: d_cuda_hdiag_get_nzeros, d_cuda_hdiag_free, d_cuda_hdiag_get_fmt, & + & d_cuda_hdiag_get_size, d_cuda_hdiag_sizeof, d_cuda_hdiag_get_nz_row + + + interface + subroutine psb_d_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_d_base_vect_type +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_d_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_d_cuda_hdiag_mold(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hdiag_mold + end interface + + interface + subroutine psb_d_cuda_hdiag_to_gpu(a,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_d_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_d_cuda_hdiag_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_d_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_d_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function d_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function d_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hdia_sparse_mat%free() + + return + + end subroutine d_cuda_hdiag_free + + subroutine d_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_d_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hdia_sparse_mat%free() + + return + end subroutine d_cuda_hdiag_finalize + +end module psb_d_cuda_hdiag_mat_mod diff --git a/cuda/psb_d_cuda_hlg_mat_mod.F90 b/cuda/psb_d_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..6627f824 --- /dev/null +++ b/cuda/psb_d_cuda_hlg_mat_mod.F90 @@ -0,0 +1,379 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_hlg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_hll_sparse_mat) :: psb_d_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => d_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_d_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_d_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => d_cuda_hlg_free + procedure, pass(a) :: mold => psb_d_cuda_hlg_mold + procedure, pass(a) :: is_host => d_cuda_hlg_is_host + procedure, pass(a) :: is_dev => d_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => d_cuda_hlg_is_sync + procedure, pass(a) :: set_host => d_cuda_hlg_set_host + procedure, pass(a) :: set_dev => d_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => d_cuda_hlg_set_sync + procedure, pass(a) :: sync => d_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_d_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_d_cuda_hlg_to_gpu + final :: d_cuda_hlg_finalize + end type psb_d_cuda_hlg_sparse_mat + + private :: d_cuda_hlg_get_nzeros, d_cuda_hlg_free, d_cuda_hlg_get_fmt, & + & d_cuda_hlg_get_size, d_cuda_hlg_sizeof, d_cuda_hlg_get_nz_row + + + interface + subroutine psb_d_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_d_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_d_base_vect_type + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_d_cuda_hlg_reallocate_nz(nz,a) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_hlg_mold(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hlg_mold + end interface + + interface + subroutine psb_d_cuda_hlg_from_gpu(a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_d_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_d_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_d_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hlg_csmv + end interface + interface + subroutine psb_d_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hlg_csmm + end interface + + interface + subroutine psb_d_cuda_hlg_scal(d,a,info, side) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_hlg_scal + end interface + + interface + subroutine psb_d_cuda_hlg_scals(d,a,info) + import :: psb_d_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_hlg_sizeof + + function d_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function d_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_d_hll_sparse_mat%free() + + return + + end subroutine d_cuda_hlg_free + + + subroutine d_cuda_hlg_sync(a) + implicit none + class(psb_d_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_d_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine d_cuda_hlg_sync + + subroutine d_cuda_hlg_set_host(a) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_cuda_hlg_set_host + + subroutine d_cuda_hlg_set_dev(a) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_cuda_hlg_set_dev + + subroutine d_cuda_hlg_set_sync(a) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_cuda_hlg_set_sync + + function d_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_cuda_hlg_is_dev + + function d_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_cuda_hlg_is_host + + function d_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_d_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_cuda_hlg_is_sync + + + subroutine d_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_d_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine d_cuda_hlg_finalize + +end module psb_d_cuda_hlg_mat_mod diff --git a/cuda/psb_d_cuda_hybg_mat_mod.F90 b/cuda/psb_d_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..065c0db3 --- /dev/null +++ b/cuda/psb_d_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_d_cuda_hybg_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use cusparse_mod + + type, extends(psb_d_csr_sparse_mat) :: psb_d_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(d_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => d_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => d_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_d_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_d_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_d_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_d_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_d_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_d_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => d_cuda_hybg_free + procedure, pass(a) :: mold => psb_d_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_d_cuda_hybg_to_gpu + final :: d_cuda_hybg_finalize + end type psb_d_cuda_hybg_sparse_mat + + private :: d_cuda_hybg_get_nzeros, d_cuda_hybg_free, d_cuda_hybg_get_fmt, & + & d_cuda_hybg_get_size, d_cuda_hybg_sizeof, d_cuda_hybg_get_nz_row + + + interface + subroutine psb_d_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_d_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_d_cuda_hybg_reallocate_nz(nz,a) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_d_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_d_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_d_cuda_hybg_mold(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hybg_mold + end interface + + interface + subroutine psb_d_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_d_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_d_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_d_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_d_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_d_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_d_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_d_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hybg_csmv + end interface + interface + subroutine psb_d_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_cuda_hybg_csmm + end interface + + interface + subroutine psb_d_cuda_hybg_scal(d,a,info,side) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_cuda_hybg_scal + end interface + + interface + subroutine psb_d_cuda_hybg_scals(d,a,info) + import :: psb_d_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_d_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function d_cuda_hybg_sizeof + + function d_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function d_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_d_csr_sparse_mat%free() + + return + + end subroutine d_cuda_hybg_free + + subroutine d_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_d_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine d_cuda_hybg_finalize + +end module psb_d_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90 new file mode 100644 index 00000000..dbc68111 --- /dev/null +++ b/cuda/psb_d_cuda_vect_mod.F90 @@ -0,0 +1,2099 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_d_vect_mod + use psb_cuda_env_mod + use psb_i_vect_mod + use psb_i_cuda_vect_mod + use psb_i_vectordev_mod + use psb_d_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_vect_type) :: psb_d_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + real(c_double), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => d_cuda_get_nrows + procedure, nopass :: get_fmt => d_cuda_get_fmt + + procedure, pass(x) :: all => d_cuda_all + procedure, pass(x) :: zero => d_cuda_zero + procedure, pass(x) :: asb_m => d_cuda_asb_m + procedure, pass(x) :: sync => d_cuda_sync + procedure, pass(x) :: sync_space => d_cuda_sync_space + procedure, pass(x) :: bld_x => d_cuda_bld_x + procedure, pass(x) :: bld_mn => d_cuda_bld_mn + procedure, pass(x) :: free => d_cuda_free + procedure, pass(x) :: ins_a => d_cuda_ins_a + procedure, pass(x) :: ins_v => d_cuda_ins_v + procedure, pass(x) :: is_host => d_cuda_is_host + procedure, pass(x) :: is_dev => d_cuda_is_dev + procedure, pass(x) :: is_sync => d_cuda_is_sync + procedure, pass(x) :: set_host => d_cuda_set_host + procedure, pass(x) :: set_dev => d_cuda_set_dev + procedure, pass(x) :: set_sync => d_cuda_set_sync + procedure, pass(x) :: set_scal => d_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => d_cuda_set_vect + procedure, pass(x) :: gthzv_x => d_cuda_gthzv_x + procedure, pass(y) :: sctb => d_cuda_sctb + procedure, pass(y) :: sctb_x => d_cuda_sctb_x + procedure, pass(x) :: gthzbuf => d_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => d_cuda_sctb_buf + procedure, pass(x) :: new_buffer => d_cuda_new_buffer + procedure, nopass :: device_wait => d_cuda_device_wait + procedure, pass(x) :: free_buffer => d_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => d_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => d_cuda_dot_v + procedure, pass(x) :: dot_a => d_cuda_dot_a + procedure, pass(y) :: axpby_v => d_cuda_axpby_v + procedure, pass(y) :: axpby_a => d_cuda_axpby_a + procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz + procedure, pass(y) :: mlt_v => d_cuda_mlt_v + procedure, pass(y) :: mlt_a => d_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => d_cuda_mlt_v_2 + procedure, pass(x) :: scal => d_cuda_scal + procedure, pass(x) :: nrm2 => d_cuda_nrm2 + procedure, pass(x) :: amax => d_cuda_amax + procedure, pass(x) :: asum => d_cuda_asum + procedure, pass(x) :: absval1 => d_cuda_absval1 + procedure, pass(x) :: absval2 => d_cuda_absval2 + + final :: d_cuda_vect_finalize + end type psb_d_vect_cuda + + public :: psb_d_vect_cuda_ + private :: constructor + interface psb_d_vect_cuda_ + module procedure constructor + end interface psb_d_vect_cuda_ + +contains + + function constructor(x) result(this) + real(psb_dpk_) :: x(:) + type(psb_d_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine d_cuda_device_wait() + call psb_cudaSync() + end subroutine d_cuda_device_wait + + subroutine d_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDouble(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDouble(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDouble(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDouble(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDouble(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceDouble(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDouble(x%dt_buf,y,n) + + end select + + end subroutine d_cuda_gthzv_x + + subroutine d_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_d_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceDoubleVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readDouble(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceDouble(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readDouble(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine d_cuda_gthzbuf + + subroutine d_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_d_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine d_cuda_sctb + + subroutine d_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDouble(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDouble(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeDouble(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDouble(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDouble(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeDouble(y%dt_buf,x,n) + info = iscatMultiVecDeviceDouble(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine d_cuda_sctb_x + + subroutine d_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta + class(psb_d_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeDouble(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceDoubleVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeDouble(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceDouble(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine d_cuda_sctb_buf + + + subroutine d_cuda_bld_x(x,this) + use psb_base_mod + real(psb_dpk_), intent(in) :: this(:) + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine d_cuda_bld_x + + subroutine d_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'d_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine d_cuda_bld_mn + + subroutine d_cuda_set_host(x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine d_cuda_set_host + + subroutine d_cuda_set_dev(x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine d_cuda_set_dev + + subroutine d_cuda_set_sync(x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine d_cuda_set_sync + + function d_cuda_is_dev(x) result(res) + implicit none + class(psb_d_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_cuda_is_dev + + function d_cuda_is_host(x) result(res) + implicit none + class(psb_d_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_cuda_is_host + + function d_cuda_is_sync(x) result(res) + implicit none + class(psb_d_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_cuda_is_sync + + + function d_cuda_get_nrows(x) result(res) + implicit none + class(psb_d_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function d_cuda_get_nrows + + function d_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'dGPU' + end function d_cuda_get_fmt + + subroutine d_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_d_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine d_cuda_all + + subroutine d_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(dzero) + end subroutine d_cuda_zero + + subroutine d_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_d_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'d_cuda_sync') + end if + + end subroutine d_cuda_sync + + subroutine d_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine d_cuda_free + + subroutine d_cuda_set_scal(x,val,first,last) + class(psb_d_vect_cuda), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine d_cuda_set_scal +!!$ +!!$ subroutine d_cuda_set_vect(x,val) +!!$ class(psb_d_vect_cuda), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_d_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine d_cuda_set_vect + + + + function d_cuda_dot_v(n,x,y) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + + res = dzero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_d_vect + ! + select type(yy => y) + type is (psb_d_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_d_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'d_cuda_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function d_cuda_dot_v + + function d_cuda_dot_a(n,x,y) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function d_cuda_dot_a + + subroutine d_cuda_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_cuda), intent(inout) :: y + real(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + class is (psb_d_vect_cuda) + select type(yy => y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if ((beta /= dzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= dzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) + class is (psb_d_vect_cuda) + select type(yy => y) + class is (psb_d_vect_cuda) + select type(zz => z) + class is (psb_d_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + type is (psb_d_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_d_vect_cuda) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine d_cuda_mlt_v + + subroutine d_cuda_mlt_a(x, y, info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_cuda), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_d_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine d_cuda_mlt_a + + subroutine d_cuda_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: alpha,beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + class(psb_d_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine d_cuda_mlt_a_2 + + subroutine d_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + real(psb_dpk_), intent(in) :: alpha,beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_d_vect_cuda) + select type (yy => y) + type is (psb_d_vect_cuda) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= dzero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= dzero).and.(z%is_dev())) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= dzero).and.(z%is_dev())) call z%sync() + call z%psb_d_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine d_cuda_mlt_v_2 + + subroutine d_cuda_scal(alpha, x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + real(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine d_cuda_scal + + + function d_cuda_nrm2(n,x) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDevice(res,n,x%deviceVect) + + end function d_cuda_nrm2 + + function d_cuda_amax(n,x) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDevice(res,n,x%deviceVect) + + end function d_cuda_amax + + function d_cuda_asum(n,x) result(res) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDevice(res,n,x%deviceVect) + + end function d_cuda_asum + + subroutine d_cuda_absval1(x) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,done,x%deviceVect) + + end subroutine d_cuda_absval1 + + subroutine d_cuda_absval2(x,y) + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_d_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,done,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_d_base_vect_type%absval(y) + end select + end subroutine d_cuda_absval2 + + + subroutine d_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine d_cuda_vect_finalize + + subroutine d_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_d_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_d_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceDouble(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine d_cuda_ins_v + + subroutine d_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_d_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_d_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine d_cuda_ins_a + +end module psb_d_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_d_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_d_multivect_mod + use psb_d_base_multivect_mod + use psb_cuda_env_mod + use psb_i_multivect_mod + use psb_i_cuda_multivect_mod + use psb_d_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_multivect_type) :: psb_d_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => d_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => d_cuda_multi_get_ncols + procedure, nopass :: get_fmt => d_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => d_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => d_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => d_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => d_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => d_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => d_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => d_cuda_multi_amax +!!$ procedure, pass(x) :: asum => d_cuda_multi_asum + procedure, pass(x) :: all => d_cuda_multi_all + procedure, pass(x) :: zero => d_cuda_multi_zero + procedure, pass(x) :: asb => d_cuda_multi_asb + procedure, pass(x) :: sync => d_cuda_multi_sync + procedure, pass(x) :: sync_space => d_cuda_multi_sync_space + procedure, pass(x) :: bld_x => d_cuda_multi_bld_x + procedure, pass(x) :: bld_n => d_cuda_multi_bld_n + procedure, pass(x) :: free => d_cuda_multi_free + procedure, pass(x) :: ins => d_cuda_multi_ins + procedure, pass(x) :: is_host => d_cuda_multi_is_host + procedure, pass(x) :: is_dev => d_cuda_multi_is_dev + procedure, pass(x) :: is_sync => d_cuda_multi_is_sync + procedure, pass(x) :: set_host => d_cuda_multi_set_host + procedure, pass(x) :: set_dev => d_cuda_multi_set_dev + procedure, pass(x) :: set_sync => d_cuda_multi_set_sync + procedure, pass(x) :: set_scal => d_cuda_multi_set_scal + procedure, pass(x) :: set_vect => d_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => d_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => d_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => d_cuda_multi_sctb_x + final :: d_cuda_multi_vect_finalize + end type psb_d_multivect_cuda + + public :: psb_d_multivect_cuda + private :: constructor + interface psb_d_multivect_cuda + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + real(psb_dpk_) :: x(:,:) + type(psb_d_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + + +!!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_dpk_) :: y(:) +!!$ class(psb_d_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine d_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine d_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_d_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine d_cuda_multi_sctb +!!$ +!!$ subroutine d_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine d_cuda_multi_sctb_x + + + subroutine d_cuda_multi_bld_x(x,this) + use psb_base_mod + real(psb_dpk_), intent(in) :: this(:,:) + class(psb_d_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine d_cuda_multi_bld_x + + subroutine d_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'d_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine d_cuda_multi_bld_n + + + subroutine d_cuda_multi_set_host(x) + implicit none + class(psb_d_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine d_cuda_multi_set_host + + subroutine d_cuda_multi_set_dev(x) + implicit none + class(psb_d_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine d_cuda_multi_set_dev + + subroutine d_cuda_multi_set_sync(x) + implicit none + class(psb_d_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine d_cuda_multi_set_sync + + function d_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_cuda_multi_is_dev + + function d_cuda_multi_is_host(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_cuda_multi_is_host + + function d_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_cuda_multi_is_sync + + + function d_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function d_cuda_multi_get_nrows + + function d_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_d_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function d_cuda_multi_get_ncols + + function d_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'dGPU' + end function d_cuda_multi_get_fmt + +!!$ function d_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ class(psb_d_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = dzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_d_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_d_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_d_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'d_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function d_cuda_multi_dot_v +!!$ +!!$ function d_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function d_cuda_multi_dot_a +!!$ +!!$ subroutine d_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_d_base_multivect_type), intent(inout) :: x +!!$ class(psb_d_multivect_cuda), intent(inout) :: y +!!$ real(psb_dpk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_d_base_multivect_type) +!!$ if ((beta /= dzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_d_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= dzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_d_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_d_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine d_cuda_multi_mlt_v +!!$ +!!$ subroutine d_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_d_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_d_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine d_cuda_multi_mlt_a +!!$ +!!$ subroutine d_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha,beta +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_d_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine d_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine d_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha,beta +!!$ class(psb_d_base_multivect_type), intent(inout) :: x +!!$ class(psb_d_base_multivect_type), intent(inout) :: y +!!$ class(psb_d_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_d_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_d_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_d_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine d_cuda_multi_mlt_v_2 + + + subroutine d_cuda_multi_set_scal(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_d_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine d_cuda_multi_set_scal + + subroutine d_cuda_multi_set_vect(x,val) + class(psb_d_multivect_cuda), intent(inout) :: x + real(psb_dpk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_d_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine d_cuda_multi_set_vect + + + +!!$ subroutine d_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ real(psb_dpk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_d_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine d_cuda_multi_scal +!!$ +!!$ +!!$ function d_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function d_cuda_multi_nrm2 +!!$ +!!$ function d_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function d_cuda_multi_amax +!!$ +!!$ function d_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_d_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function d_cuda_multi_asum + + subroutine d_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=dzero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'d_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine d_cuda_multi_all + + subroutine d_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_d_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=dzero + call x%set_host() + end subroutine d_cuda_multi_zero + + subroutine d_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_d_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc i_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_i_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_i_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => i_cuda_csrg_free + procedure, pass(a) :: mold => psb_i_cuda_csrg_mold + procedure, pass(a) :: is_host => i_cuda_csrg_is_host + procedure, pass(a) :: is_dev => i_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => i_cuda_csrg_is_sync + procedure, pass(a) :: set_host => i_cuda_csrg_set_host + procedure, pass(a) :: set_dev => i_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => i_cuda_csrg_set_sync + procedure, pass(a) :: sync => i_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_i_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_i_cuda_csrg_from_gpu + final :: i_cuda_csrg_finalize + end type psb_i_cuda_csrg_sparse_mat + + private :: i_cuda_csrg_get_nzeros, i_cuda_csrg_free, i_cuda_csrg_get_fmt, & + & i_cuda_csrg_get_size, i_cuda_csrg_sizeof, i_cuda_csrg_get_nz_row + + + interface + subroutine psb_i_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_i_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_i_cuda_csrg_reallocate_nz(nz,a) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_csrg_mold(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_mold + end interface + + interface + subroutine psb_i_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_i_cuda_csrg_from_gpu(a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_i_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_i_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmv + end interface + interface + subroutine psb_i_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_csrg_csmm + end interface + + interface + subroutine psb_i_cuda_csrg_scal(d,a,info,side) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_csrg_scal + end interface + + interface + subroutine psb_i_cuda_csrg_scals(d,a,info) + import :: psb_i_cuda_csrg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_csrg_sizeof + + function i_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function i_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine i_cuda_csrg_set_host(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_cuda_csrg_set_host + + subroutine i_cuda_csrg_set_dev(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_cuda_csrg_set_dev + + subroutine i_cuda_csrg_set_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_cuda_csrg_set_sync + + function i_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_cuda_csrg_is_dev + + function i_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_cuda_csrg_is_host + + function i_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_i_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_cuda_csrg_is_sync + + + subroutine i_cuda_csrg_sync(a) + implicit none + class(psb_i_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_cuda_csrg_sync + + subroutine i_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_cuda_csrg_free + + subroutine i_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_i_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine i_cuda_csrg_finalize + +end module psb_i_cuda_csrg_mat_mod diff --git a/cuda/psb_i_cuda_diag_mat_mod.F90 b/cuda/psb_i_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..98090f06 --- /dev/null +++ b/cuda/psb_i_cuda_diag_mat_mod.F90 @@ -0,0 +1,289 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_i_dia_mat_mod + + type, extends(psb_i_dia_sparse_mat) :: psb_i_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => i_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_i_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_i_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_i_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_i_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_i_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => i_cuda_diag_free + procedure, pass(a) :: mold => psb_i_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_diag_to_gpu + final :: i_cuda_diag_finalize + end type psb_i_cuda_diag_sparse_mat + + private :: i_cuda_diag_get_nzeros, i_cuda_diag_free, i_cuda_diag_get_fmt, & + & i_cuda_diag_get_size, i_cuda_diag_sizeof, i_cuda_diag_get_nz_row + + + interface + subroutine psb_i_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_diag_vect_mv + end interface + + interface + subroutine psb_i_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_i_cuda_diag_reallocate_nz(nz,a) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_diag_mold(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_diag_mold + end interface + + interface + subroutine psb_i_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_diag_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_diag_from_coo(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_i_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_i_cuda_diag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_i_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_diag_csmv + end interface + interface + subroutine psb_i_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_diag_csmm + end interface + + interface + subroutine psb_i_cuda_diag_scal(d,a,info, side) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_diag_scal + end interface + + interface + subroutine psb_i_cuda_diag_scals(d,a,info) + import :: psb_i_cuda_diag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_i_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_ip * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_diag_sizeof + + function i_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function i_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_dia_sparse_mat%free() + + return + + end subroutine i_cuda_diag_free + + subroutine i_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_i_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_cuda_diag_finalize + +end module psb_i_cuda_diag_mat_mod diff --git a/cuda/psb_i_cuda_dnsg_mat_mod.F90 b/cuda/psb_i_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..6586d115 --- /dev/null +++ b/cuda/psb_i_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,275 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_dns_mat_mod + use dnsdev_mod + + type, extends(psb_i_dns_sparse_mat) :: psb_i_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_i_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_i_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_i_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_i_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_i_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_i_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_i_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => i_cuda_dnsg_free + procedure, pass(a) :: mold => psb_i_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_dnsg_to_gpu + final :: i_cuda_dnsg_finalize + end type psb_i_cuda_dnsg_sparse_mat + + private :: i_cuda_dnsg_get_nzeros, i_cuda_dnsg_free, i_cuda_dnsg_get_fmt, & + & i_cuda_dnsg_get_size, i_cuda_dnsg_get_nz_row + + + interface + subroutine psb_i_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta +!!$ class(psb_i_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_i_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_i_cuda_dnsg_mold(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_dnsg_mold + end interface + + interface + subroutine psb_i_cuda_dnsg_to_gpu(a,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_i_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_i_cuda_dnsg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:) +!!$ integer(psb_ipk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) +!!$ integer(psb_ipk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_i_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_i_cuda_dnsg_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function i_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function i_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_dns_sparse_mat%free() + + return + + end subroutine i_cuda_dnsg_free + + subroutine i_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_i_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_cuda_dnsg_finalize + +end module psb_i_cuda_dnsg_mat_mod diff --git a/cuda/psb_i_cuda_elg_mat_mod.F90 b/cuda/psb_i_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..48311eea --- /dev/null +++ b/cuda/psb_i_cuda_elg_mat_mod.F90 @@ -0,0 +1,456 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_elg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_ell_sparse_mat) :: psb_i_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_elg_scals + procedure, pass(a) :: scalv => psb_i_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => i_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => i_cuda_elg_free + procedure, pass(a) :: mold => psb_i_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_i_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_i_cuda_elg_csput_v + procedure, pass(a) :: is_host => i_cuda_elg_is_host + procedure, pass(a) :: is_dev => i_cuda_elg_is_dev + procedure, pass(a) :: is_sync => i_cuda_elg_is_sync + procedure, pass(a) :: set_host => i_cuda_elg_set_host + procedure, pass(a) :: set_dev => i_cuda_elg_set_dev + procedure, pass(a) :: set_sync => i_cuda_elg_set_sync + procedure, pass(a) :: sync => i_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_i_cuda_elg_asb + final :: i_cuda_elg_finalize + end type psb_i_cuda_elg_sparse_mat + + private :: i_cuda_elg_get_nzeros, i_cuda_elg_free, i_cuda_elg_get_fmt, & + & i_cuda_elg_get_size, i_cuda_elg_sizeof, i_cuda_elg_get_nz_row, i_cuda_elg_sync + + + interface + subroutine psb_i_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_elg_vect_mv + end interface + + interface + subroutine psb_i_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_i_cuda_elg_reallocate_nz(nz,a) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_elg_mold(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_mold + end interface + + interface + subroutine psb_i_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_csput_a + end interface + + interface + subroutine psb_i_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_i_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_i_base_vect_type,& + & psb_i_base_vect_type + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_csput_v + end interface + + interface + subroutine psb_i_cuda_elg_from_gpu(a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_from_gpu + end interface + + interface + subroutine psb_i_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_elg_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_elg_from_coo(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_i_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_i_cuda_elg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_i_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_elg_csmv + end interface + interface + subroutine psb_i_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_elg_csmm + end interface + + interface + subroutine psb_i_cuda_elg_scal(d,a,info, side) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_elg_scal + end interface + + interface + subroutine psb_i_cuda_elg_scals(d,a,info) + import :: psb_i_cuda_elg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_elg_scals + end interface + + interface + subroutine psb_i_cuda_elg_asb(a) + import :: psb_i_cuda_elg_sparse_mat + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_elg_sizeof + + function i_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function i_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine i_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = izero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine i_cuda_elg_reinit + + subroutine i_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine i_cuda_elg_free + + subroutine i_cuda_elg_sync(a) + implicit none + class(psb_i_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_cuda_elg_sync + + subroutine i_cuda_elg_set_host(a) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_cuda_elg_set_host + + subroutine i_cuda_elg_set_dev(a) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_cuda_elg_set_dev + + subroutine i_cuda_elg_set_sync(a) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_cuda_elg_set_sync + + function i_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_cuda_elg_is_dev + + function i_cuda_elg_is_host(a) result(res) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_cuda_elg_is_host + + function i_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_i_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_cuda_elg_is_sync + + subroutine i_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_i_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine i_cuda_elg_finalize + +end module psb_i_cuda_elg_mat_mod diff --git a/cuda/psb_i_cuda_hdiag_mat_mod.F90 b/cuda/psb_i_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..22277f3a --- /dev/null +++ b/cuda/psb_i_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_i_hdia_mat_mod + + type, extends(psb_i_hdia_sparse_mat) :: psb_i_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => i_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => i_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_i_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_i_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_i_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_i_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_i_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => i_cuda_hdiag_free + procedure, pass(a) :: mold => psb_i_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hdiag_to_gpu + final :: i_cuda_hdiag_finalize + end type psb_i_cuda_hdiag_sparse_mat + + private :: i_cuda_hdiag_get_nzeros, i_cuda_hdiag_free, i_cuda_hdiag_get_fmt, & + & i_cuda_hdiag_get_size, i_cuda_hdiag_sizeof, i_cuda_hdiag_get_nz_row + + + interface + subroutine psb_i_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_i_base_vect_type +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta +!!$ class(psb_i_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_i_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_i_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_i_cuda_hdiag_mold(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hdiag_mold + end interface + + interface + subroutine psb_i_cuda_hdiag_to_gpu(a,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_i_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_i_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_i_cuda_hdiag_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_i_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_i_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) +!!$ integer(psb_ipk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_i_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_i_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_i_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_i_cuda_hdiag_sparse_mat, psb_ipk_, psb_ipk_ +!!$ class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_i_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function i_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function i_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hdia_sparse_mat%free() + + return + + end subroutine i_cuda_hdiag_free + + subroutine i_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_i_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hdia_sparse_mat%free() + + return + end subroutine i_cuda_hdiag_finalize + +end module psb_i_cuda_hdiag_mat_mod diff --git a/cuda/psb_i_cuda_hlg_mat_mod.F90 b/cuda/psb_i_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..b4e3046e --- /dev/null +++ b/cuda/psb_i_cuda_hlg_mat_mod.F90 @@ -0,0 +1,379 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_hlg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use psb_i_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_hll_sparse_mat) :: psb_i_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => i_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_i_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_i_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => i_cuda_hlg_free + procedure, pass(a) :: mold => psb_i_cuda_hlg_mold + procedure, pass(a) :: is_host => i_cuda_hlg_is_host + procedure, pass(a) :: is_dev => i_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => i_cuda_hlg_is_sync + procedure, pass(a) :: set_host => i_cuda_hlg_set_host + procedure, pass(a) :: set_dev => i_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => i_cuda_hlg_set_sync + procedure, pass(a) :: sync => i_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_i_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_i_cuda_hlg_to_gpu + final :: i_cuda_hlg_finalize + end type psb_i_cuda_hlg_sparse_mat + + private :: i_cuda_hlg_get_nzeros, i_cuda_hlg_free, i_cuda_hlg_get_fmt, & + & i_cuda_hlg_get_size, i_cuda_hlg_sizeof, i_cuda_hlg_get_nz_row + + + interface + subroutine psb_i_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_i_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_i_base_vect_type + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_i_cuda_hlg_reallocate_nz(nz,a) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_hlg_mold(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hlg_mold + end interface + + interface + subroutine psb_i_cuda_hlg_from_gpu(a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_i_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_i_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_i_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hlg_csmv + end interface + interface + subroutine psb_i_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hlg_csmm + end interface + + interface + subroutine psb_i_cuda_hlg_scal(d,a,info, side) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_hlg_scal + end interface + + interface + subroutine psb_i_cuda_hlg_scals(d,a,info) + import :: psb_i_cuda_hlg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_hlg_sizeof + + function i_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function i_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_i_hll_sparse_mat%free() + + return + + end subroutine i_cuda_hlg_free + + + subroutine i_cuda_hlg_sync(a) + implicit none + class(psb_i_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_i_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine i_cuda_hlg_sync + + subroutine i_cuda_hlg_set_host(a) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine i_cuda_hlg_set_host + + subroutine i_cuda_hlg_set_dev(a) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine i_cuda_hlg_set_dev + + subroutine i_cuda_hlg_set_sync(a) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine i_cuda_hlg_set_sync + + function i_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function i_cuda_hlg_is_dev + + function i_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function i_cuda_hlg_is_host + + function i_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_i_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function i_cuda_hlg_is_sync + + + subroutine i_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_i_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine i_cuda_hlg_finalize + +end module psb_i_cuda_hlg_mat_mod diff --git a/cuda/psb_i_cuda_hybg_mat_mod.F90 b/cuda/psb_i_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..f6fc3110 --- /dev/null +++ b/cuda/psb_i_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_i_cuda_hybg_mat_mod + + use iso_c_binding + use psb_i_mat_mod + use cusparse_mod + + type, extends(psb_i_csr_sparse_mat) :: psb_i_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(i_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => i_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => i_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_i_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_i_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_i_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_i_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_i_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_i_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_i_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_i_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_i_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_i_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_i_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_i_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => i_cuda_hybg_free + procedure, pass(a) :: mold => psb_i_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_i_cuda_hybg_to_gpu + final :: i_cuda_hybg_finalize + end type psb_i_cuda_hybg_sparse_mat + + private :: i_cuda_hybg_get_nzeros, i_cuda_hybg_free, i_cuda_hybg_get_fmt, & + & i_cuda_hybg_get_size, i_cuda_hybg_sizeof, i_cuda_hybg_get_nz_row + + + interface + subroutine psb_i_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_i_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_i_base_vect_type, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta + class(psb_i_base_vect_type), intent(inout) :: x + class(psb_i_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_i_cuda_hybg_reallocate_nz(nz,a) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_i_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_i_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_i_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_i_cuda_hybg_mold(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_i_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hybg_mold + end interface + + interface + subroutine psb_i_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_i_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_i_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_i_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_i_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_coo_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_i_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_i_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_i_base_sparse_mat, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_i_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_i_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:) + integer(psb_ipk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hybg_csmv + end interface + interface + subroutine psb_i_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: alpha, beta, x(:,:) + integer(psb_ipk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_i_cuda_hybg_csmm + end interface + + interface + subroutine psb_i_cuda_hybg_scal(d,a,info,side) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_i_cuda_hybg_scal + end interface + + interface + subroutine psb_i_cuda_hybg_scals(d,a,info) + import :: psb_i_cuda_hybg_sparse_mat, psb_ipk_, psb_ipk_ + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_i_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function i_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_i_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_ip * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function i_cuda_hybg_sizeof + + function i_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function i_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine i_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_i_csr_sparse_mat%free() + + return + + end subroutine i_cuda_hybg_free + + subroutine i_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_i_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine i_cuda_hybg_finalize + +end module psb_i_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90 new file mode 100644 index 00000000..461d84d1 --- /dev/null +++ b/cuda/psb_i_cuda_vect_mod.F90 @@ -0,0 +1,1655 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_i_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_vect_mod + use psb_cuda_env_mod + use psb_i_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_vect_type) :: psb_i_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + integer(c_int), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + integer(c_int), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => i_cuda_get_nrows + procedure, nopass :: get_fmt => i_cuda_get_fmt + + procedure, pass(x) :: all => i_cuda_all + procedure, pass(x) :: zero => i_cuda_zero + procedure, pass(x) :: asb_m => i_cuda_asb_m + procedure, pass(x) :: sync => i_cuda_sync + procedure, pass(x) :: sync_space => i_cuda_sync_space + procedure, pass(x) :: bld_x => i_cuda_bld_x + procedure, pass(x) :: bld_mn => i_cuda_bld_mn + procedure, pass(x) :: free => i_cuda_free + procedure, pass(x) :: ins_a => i_cuda_ins_a + procedure, pass(x) :: ins_v => i_cuda_ins_v + procedure, pass(x) :: is_host => i_cuda_is_host + procedure, pass(x) :: is_dev => i_cuda_is_dev + procedure, pass(x) :: is_sync => i_cuda_is_sync + procedure, pass(x) :: set_host => i_cuda_set_host + procedure, pass(x) :: set_dev => i_cuda_set_dev + procedure, pass(x) :: set_sync => i_cuda_set_sync + procedure, pass(x) :: set_scal => i_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => i_cuda_set_vect + procedure, pass(x) :: gthzv_x => i_cuda_gthzv_x + procedure, pass(y) :: sctb => i_cuda_sctb + procedure, pass(y) :: sctb_x => i_cuda_sctb_x + procedure, pass(x) :: gthzbuf => i_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => i_cuda_sctb_buf + procedure, pass(x) :: new_buffer => i_cuda_new_buffer + procedure, nopass :: device_wait => i_cuda_device_wait + procedure, pass(x) :: free_buffer => i_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => i_cuda_maybe_free_buffer + + final :: i_cuda_vect_finalize + end type psb_i_vect_cuda + + public :: psb_i_vect_cuda_ + private :: constructor + interface psb_i_vect_cuda_ + module procedure constructor + end interface psb_i_vect_cuda_ + +contains + + function constructor(x) result(this) + integer(psb_ipk_) :: x(:) + type(psb_i_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine i_cuda_device_wait() + call psb_cudaSync() + end subroutine i_cuda_device_wait + + subroutine i_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeInt(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateInt(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readInt(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeInt(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateInt(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceInt(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readInt(x%dt_buf,y,n) + + end select + + end subroutine i_cuda_gthzv_x + + subroutine i_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_i_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceIntVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readInt(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceInt(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readInt(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine i_cuda_gthzbuf + + subroutine i_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_i_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine i_cuda_sctb + + subroutine i_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeInt(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateInt(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeInt(y%dt_buf,x,n) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeInt(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateInt(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeInt(y%dt_buf,x,n) + info = iscatMultiVecDeviceInt(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine i_cuda_sctb_x + + subroutine i_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta + class(psb_i_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeInt(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceIntVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeInt(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceInt(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine i_cuda_sctb_buf + + + subroutine i_cuda_bld_x(x,this) + use psb_base_mod + integer(psb_ipk_), intent(in) :: this(:) + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine i_cuda_bld_x + + subroutine i_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'i_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine i_cuda_bld_mn + + subroutine i_cuda_set_host(x) + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine i_cuda_set_host + + subroutine i_cuda_set_dev(x) + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine i_cuda_set_dev + + subroutine i_cuda_set_sync(x) + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine i_cuda_set_sync + + function i_cuda_is_dev(x) result(res) + implicit none + class(psb_i_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_cuda_is_dev + + function i_cuda_is_host(x) result(res) + implicit none + class(psb_i_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_cuda_is_host + + function i_cuda_is_sync(x) result(res) + implicit none + class(psb_i_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_cuda_is_sync + + + function i_cuda_get_nrows(x) result(res) + implicit none + class(psb_i_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function i_cuda_get_nrows + + function i_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'iGPU' + end function i_cuda_get_fmt + + subroutine i_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine i_cuda_all + + subroutine i_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(izero) + end subroutine i_cuda_zero + + subroutine i_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_i_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'i_cuda_sync') + end if + + end subroutine i_cuda_sync + + subroutine i_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine i_cuda_free + + subroutine i_cuda_set_scal(x,val,first,last) + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine i_cuda_set_scal +!!$ +!!$ subroutine i_cuda_set_vect(x,val) +!!$ class(psb_i_vect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_i_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine i_cuda_set_vect + + + + subroutine i_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine i_cuda_vect_finalize + + subroutine i_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_i_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_i_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceInt(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine i_cuda_ins_v + + subroutine i_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_i_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_i_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine i_cuda_ins_a + +end module psb_i_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_i_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_multivect_mod + use psb_i_base_multivect_mod + use psb_cuda_env_mod + use psb_i_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_multivect_type) :: psb_i_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => i_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => i_cuda_multi_get_ncols + procedure, nopass :: get_fmt => i_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => i_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => i_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => i_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => i_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => i_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => i_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => i_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => i_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => i_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => i_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => i_cuda_multi_amax +!!$ procedure, pass(x) :: asum => i_cuda_multi_asum + procedure, pass(x) :: all => i_cuda_multi_all + procedure, pass(x) :: zero => i_cuda_multi_zero + procedure, pass(x) :: asb => i_cuda_multi_asb + procedure, pass(x) :: sync => i_cuda_multi_sync + procedure, pass(x) :: sync_space => i_cuda_multi_sync_space + procedure, pass(x) :: bld_x => i_cuda_multi_bld_x + procedure, pass(x) :: bld_n => i_cuda_multi_bld_n + procedure, pass(x) :: free => i_cuda_multi_free + procedure, pass(x) :: ins => i_cuda_multi_ins + procedure, pass(x) :: is_host => i_cuda_multi_is_host + procedure, pass(x) :: is_dev => i_cuda_multi_is_dev + procedure, pass(x) :: is_sync => i_cuda_multi_is_sync + procedure, pass(x) :: set_host => i_cuda_multi_set_host + procedure, pass(x) :: set_dev => i_cuda_multi_set_dev + procedure, pass(x) :: set_sync => i_cuda_multi_set_sync + procedure, pass(x) :: set_scal => i_cuda_multi_set_scal + procedure, pass(x) :: set_vect => i_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => i_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => i_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => i_cuda_multi_sctb_x + final :: i_cuda_multi_vect_finalize + end type psb_i_multivect_cuda + + public :: psb_i_multivect_cuda + private :: constructor + interface psb_i_multivect_cuda + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + integer(psb_ipk_) :: x(:,:) + type(psb_i_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + + +!!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ integer(psb_ipk_) :: y(:) +!!$ class(psb_i_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine i_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine i_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ integer(psb_ipk_) :: beta, x(:) +!!$ class(psb_i_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_i_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine i_cuda_multi_sctb +!!$ +!!$ subroutine i_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ integer(psb_ipk_) :: beta, x(:) +!!$ class(psb_i_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine i_cuda_multi_sctb_x + + + subroutine i_cuda_multi_bld_x(x,this) + use psb_base_mod + integer(psb_ipk_), intent(in) :: this(:,:) + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine i_cuda_multi_bld_x + + subroutine i_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'i_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine i_cuda_multi_bld_n + + + subroutine i_cuda_multi_set_host(x) + implicit none + class(psb_i_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine i_cuda_multi_set_host + + subroutine i_cuda_multi_set_dev(x) + implicit none + class(psb_i_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine i_cuda_multi_set_dev + + subroutine i_cuda_multi_set_sync(x) + implicit none + class(psb_i_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine i_cuda_multi_set_sync + + function i_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_cuda_multi_is_dev + + function i_cuda_multi_is_host(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_cuda_multi_is_host + + function i_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_cuda_multi_is_sync + + + function i_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function i_cuda_multi_get_nrows + + function i_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_i_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function i_cuda_multi_get_ncols + + function i_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'iGPU' + end function i_cuda_multi_get_fmt + +!!$ function i_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ class(psb_i_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = izero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_i_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_i_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_i_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'i_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function i_cuda_multi_dot_v +!!$ +!!$ function i_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function i_cuda_multi_dot_a +!!$ +!!$ subroutine i_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_i_base_multivect_type), intent(inout) :: x +!!$ class(psb_i_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_i_base_multivect_type) +!!$ if ((beta /= izero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_i_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= izero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_i_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_i_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine i_cuda_multi_mlt_v +!!$ +!!$ subroutine i_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: x(:) +!!$ class(psb_i_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_i_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine i_cuda_multi_mlt_a +!!$ +!!$ subroutine i_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: alpha,beta +!!$ integer(psb_ipk_), intent(in) :: x(:) +!!$ integer(psb_ipk_), intent(in) :: y(:) +!!$ class(psb_i_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine i_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine i_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: alpha,beta +!!$ class(psb_i_base_multivect_type), intent(inout) :: x +!!$ class(psb_i_base_multivect_type), intent(inout) :: y +!!$ class(psb_i_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_i_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_i_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_i_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine i_cuda_multi_mlt_v_2 + + + subroutine i_cuda_multi_set_scal(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_i_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine i_cuda_multi_set_scal + + subroutine i_cuda_multi_set_vect(x,val) + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_i_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine i_cuda_multi_set_vect + + + +!!$ subroutine i_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_i_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine i_cuda_multi_scal +!!$ +!!$ +!!$ function i_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function i_cuda_multi_nrm2 +!!$ +!!$ function i_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function i_cuda_multi_amax +!!$ +!!$ function i_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_i_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function i_cuda_multi_asum + + subroutine i_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=izero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'i_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine i_cuda_multi_all + + subroutine i_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_i_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=izero + call x%set_host() + end subroutine i_cuda_multi_zero + + subroutine i_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_i_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_i_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc s_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_s_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_s_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => s_cuda_csrg_free + procedure, pass(a) :: mold => psb_s_cuda_csrg_mold + procedure, pass(a) :: is_host => s_cuda_csrg_is_host + procedure, pass(a) :: is_dev => s_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => s_cuda_csrg_is_sync + procedure, pass(a) :: set_host => s_cuda_csrg_set_host + procedure, pass(a) :: set_dev => s_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => s_cuda_csrg_set_sync + procedure, pass(a) :: sync => s_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_s_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_s_cuda_csrg_from_gpu + final :: s_cuda_csrg_finalize + end type psb_s_cuda_csrg_sparse_mat + + private :: s_cuda_csrg_get_nzeros, s_cuda_csrg_free, s_cuda_csrg_get_fmt, & + & s_cuda_csrg_get_size, s_cuda_csrg_sizeof, s_cuda_csrg_get_nz_row + + + interface + subroutine psb_s_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_s_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_s_cuda_csrg_reallocate_nz(nz,a) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_csrg_mold(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_mold + end interface + + interface + subroutine psb_s_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_s_cuda_csrg_from_gpu(a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_s_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_s_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmv + end interface + interface + subroutine psb_s_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_csrg_csmm + end interface + + interface + subroutine psb_s_cuda_csrg_scal(d,a,info,side) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_csrg_scal + end interface + + interface + subroutine psb_s_cuda_csrg_scals(d,a,info) + import :: psb_s_cuda_csrg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_csrg_sizeof + + function s_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function s_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine s_cuda_csrg_set_host(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_cuda_csrg_set_host + + subroutine s_cuda_csrg_set_dev(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_cuda_csrg_set_dev + + subroutine s_cuda_csrg_set_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_cuda_csrg_set_sync + + function s_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_cuda_csrg_is_dev + + function s_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_cuda_csrg_is_host + + function s_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_s_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_cuda_csrg_is_sync + + + subroutine s_cuda_csrg_sync(a) + implicit none + class(psb_s_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_cuda_csrg_sync + + subroutine s_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_cuda_csrg_free + + subroutine s_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_s_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine s_cuda_csrg_finalize + +end module psb_s_cuda_csrg_mat_mod diff --git a/cuda/psb_s_cuda_diag_mat_mod.F90 b/cuda/psb_s_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..b13a8502 --- /dev/null +++ b/cuda/psb_s_cuda_diag_mat_mod.F90 @@ -0,0 +1,289 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_s_dia_mat_mod + + type, extends(psb_s_dia_sparse_mat) :: psb_s_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => s_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_s_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_s_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_s_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_s_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_s_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => s_cuda_diag_free + procedure, pass(a) :: mold => psb_s_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_diag_to_gpu + final :: s_cuda_diag_finalize + end type psb_s_cuda_diag_sparse_mat + + private :: s_cuda_diag_get_nzeros, s_cuda_diag_free, s_cuda_diag_get_fmt, & + & s_cuda_diag_get_size, s_cuda_diag_sizeof, s_cuda_diag_get_nz_row + + + interface + subroutine psb_s_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_diag_vect_mv + end interface + + interface + subroutine psb_s_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_diag_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_s_cuda_diag_reallocate_nz(nz,a) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_diag_mold(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_diag_mold + end interface + + interface + subroutine psb_s_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_s_cuda_diag_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_diag_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_diag_from_coo(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_s_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_s_cuda_diag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_s_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_diag_csmv + end interface + interface + subroutine psb_s_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_diag_csmm + end interface + + interface + subroutine psb_s_cuda_diag_scal(d,a,info, side) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_diag_scal + end interface + + interface + subroutine psb_s_cuda_diag_scals(d,a,info) + import :: psb_s_cuda_diag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_s_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + psb_sizeof_sp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_diag_sizeof + + function s_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function s_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_dia_sparse_mat%free() + + return + + end subroutine s_cuda_diag_free + + subroutine s_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_s_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_cuda_diag_finalize + +end module psb_s_cuda_diag_mat_mod diff --git a/cuda/psb_s_cuda_dnsg_mat_mod.F90 b/cuda/psb_s_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..8193655e --- /dev/null +++ b/cuda/psb_s_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,275 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_dns_mat_mod + use dnsdev_mod + + type, extends(psb_s_dns_sparse_mat) :: psb_s_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_s_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_s_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_s_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_s_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_s_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_s_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_s_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => s_cuda_dnsg_free + procedure, pass(a) :: mold => psb_s_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_dnsg_to_gpu + final :: s_cuda_dnsg_finalize + end type psb_s_cuda_dnsg_sparse_mat + + private :: s_cuda_dnsg_get_nzeros, s_cuda_dnsg_free, s_cuda_dnsg_get_fmt, & + & s_cuda_dnsg_get_size, s_cuda_dnsg_get_nz_row + + + interface + subroutine psb_s_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_s_cuda_dnsg_mold(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_dnsg_mold + end interface + + interface + subroutine psb_s_cuda_dnsg_to_gpu(a,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_s_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_s_cuda_dnsg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_s_cuda_dnsg_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function s_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function s_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_dns_sparse_mat%free() + + return + + end subroutine s_cuda_dnsg_free + + subroutine s_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_s_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_cuda_dnsg_finalize + +end module psb_s_cuda_dnsg_mat_mod diff --git a/cuda/psb_s_cuda_elg_mat_mod.F90 b/cuda/psb_s_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..1aa9d36d --- /dev/null +++ b/cuda/psb_s_cuda_elg_mat_mod.F90 @@ -0,0 +1,456 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_elg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_ell_sparse_mat) :: psb_s_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_elg_scals + procedure, pass(a) :: scalv => psb_s_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => s_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => s_cuda_elg_free + procedure, pass(a) :: mold => psb_s_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_s_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_s_cuda_elg_csput_v + procedure, pass(a) :: is_host => s_cuda_elg_is_host + procedure, pass(a) :: is_dev => s_cuda_elg_is_dev + procedure, pass(a) :: is_sync => s_cuda_elg_is_sync + procedure, pass(a) :: set_host => s_cuda_elg_set_host + procedure, pass(a) :: set_dev => s_cuda_elg_set_dev + procedure, pass(a) :: set_sync => s_cuda_elg_set_sync + procedure, pass(a) :: sync => s_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_s_cuda_elg_asb + final :: s_cuda_elg_finalize + end type psb_s_cuda_elg_sparse_mat + + private :: s_cuda_elg_get_nzeros, s_cuda_elg_free, s_cuda_elg_get_fmt, & + & s_cuda_elg_get_size, s_cuda_elg_sizeof, s_cuda_elg_get_nz_row, s_cuda_elg_sync + + + interface + subroutine psb_s_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_elg_vect_mv + end interface + + interface + subroutine psb_s_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_elg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_s_cuda_elg_reallocate_nz(nz,a) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_elg_mold(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_mold + end interface + + interface + subroutine psb_s_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_csput_a + end interface + + interface + subroutine psb_s_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_s_base_vect_type,& + & psb_i_base_vect_type + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_csput_v + end interface + + interface + subroutine psb_s_cuda_elg_from_gpu(a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_from_gpu + end interface + + interface + subroutine psb_s_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_elg_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_elg_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_elg_from_coo(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_s_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_s_cuda_elg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_s_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_elg_csmv + end interface + interface + subroutine psb_s_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_elg_csmm + end interface + + interface + subroutine psb_s_cuda_elg_scal(d,a,info, side) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_elg_scal + end interface + + interface + subroutine psb_s_cuda_elg_scals(d,a,info) + import :: psb_s_cuda_elg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_elg_scals + end interface + + interface + subroutine psb_s_cuda_elg_asb(a) + import :: psb_s_cuda_elg_sparse_mat + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_elg_sizeof + + function s_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function s_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine s_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = szero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine s_cuda_elg_reinit + + subroutine s_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine s_cuda_elg_free + + subroutine s_cuda_elg_sync(a) + implicit none + class(psb_s_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_cuda_elg_sync + + subroutine s_cuda_elg_set_host(a) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_cuda_elg_set_host + + subroutine s_cuda_elg_set_dev(a) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_cuda_elg_set_dev + + subroutine s_cuda_elg_set_sync(a) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_cuda_elg_set_sync + + function s_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_cuda_elg_is_dev + + function s_cuda_elg_is_host(a) result(res) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_cuda_elg_is_host + + function s_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_s_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_cuda_elg_is_sync + + subroutine s_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_s_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine s_cuda_elg_finalize + +end module psb_s_cuda_elg_mat_mod diff --git a/cuda/psb_s_cuda_hdiag_mat_mod.F90 b/cuda/psb_s_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..cac72c86 --- /dev/null +++ b/cuda/psb_s_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_s_hdia_mat_mod + + type, extends(psb_s_hdia_sparse_mat) :: psb_s_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => s_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => s_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_s_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_s_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_s_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_s_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_s_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => s_cuda_hdiag_free + procedure, pass(a) :: mold => psb_s_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hdiag_to_gpu + final :: s_cuda_hdiag_finalize + end type psb_s_cuda_hdiag_sparse_mat + + private :: s_cuda_hdiag_get_nzeros, s_cuda_hdiag_free, s_cuda_hdiag_get_fmt, & + & s_cuda_hdiag_get_size, s_cuda_hdiag_sizeof, s_cuda_hdiag_get_nz_row + + + interface + subroutine psb_s_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_s_base_vect_type +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_s_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_s_cuda_hdiag_mold(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hdiag_mold + end interface + + interface + subroutine psb_s_cuda_hdiag_to_gpu(a,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_s_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_s_cuda_hdiag_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_s_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_s_cuda_hdiag_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function s_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function s_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hdia_sparse_mat%free() + + return + + end subroutine s_cuda_hdiag_free + + subroutine s_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_s_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hdia_sparse_mat%free() + + return + end subroutine s_cuda_hdiag_finalize + +end module psb_s_cuda_hdiag_mat_mod diff --git a/cuda/psb_s_cuda_hlg_mat_mod.F90 b/cuda/psb_s_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..4f7b4b6f --- /dev/null +++ b/cuda/psb_s_cuda_hlg_mat_mod.F90 @@ -0,0 +1,379 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_hlg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_hll_sparse_mat) :: psb_s_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => s_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_s_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_s_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => s_cuda_hlg_free + procedure, pass(a) :: mold => psb_s_cuda_hlg_mold + procedure, pass(a) :: is_host => s_cuda_hlg_is_host + procedure, pass(a) :: is_dev => s_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => s_cuda_hlg_is_sync + procedure, pass(a) :: set_host => s_cuda_hlg_set_host + procedure, pass(a) :: set_dev => s_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => s_cuda_hlg_set_sync + procedure, pass(a) :: sync => s_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_s_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_s_cuda_hlg_to_gpu + final :: s_cuda_hlg_finalize + end type psb_s_cuda_hlg_sparse_mat + + private :: s_cuda_hlg_get_nzeros, s_cuda_hlg_free, s_cuda_hlg_get_fmt, & + & s_cuda_hlg_get_size, s_cuda_hlg_sizeof, s_cuda_hlg_get_nz_row + + + interface + subroutine psb_s_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_s_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_s_base_vect_type + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_s_cuda_hlg_reallocate_nz(nz,a) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_hlg_mold(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hlg_mold + end interface + + interface + subroutine psb_s_cuda_hlg_from_gpu(a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_s_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_s_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_s_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hlg_csmv + end interface + interface + subroutine psb_s_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hlg_csmm + end interface + + interface + subroutine psb_s_cuda_hlg_scal(d,a,info, side) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_hlg_scal + end interface + + interface + subroutine psb_s_cuda_hlg_scals(d,a,info) + import :: psb_s_cuda_hlg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_hlg_sizeof + + function s_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function s_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_s_hll_sparse_mat%free() + + return + + end subroutine s_cuda_hlg_free + + + subroutine s_cuda_hlg_sync(a) + implicit none + class(psb_s_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_s_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine s_cuda_hlg_sync + + subroutine s_cuda_hlg_set_host(a) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_cuda_hlg_set_host + + subroutine s_cuda_hlg_set_dev(a) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_cuda_hlg_set_dev + + subroutine s_cuda_hlg_set_sync(a) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_cuda_hlg_set_sync + + function s_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_cuda_hlg_is_dev + + function s_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_cuda_hlg_is_host + + function s_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_s_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_cuda_hlg_is_sync + + + subroutine s_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_s_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine s_cuda_hlg_finalize + +end module psb_s_cuda_hlg_mat_mod diff --git a/cuda/psb_s_cuda_hybg_mat_mod.F90 b/cuda/psb_s_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..f1446b15 --- /dev/null +++ b/cuda/psb_s_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_s_cuda_hybg_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use cusparse_mod + + type, extends(psb_s_csr_sparse_mat) :: psb_s_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(s_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => s_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => s_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_s_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_s_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_s_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_s_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_s_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_s_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => s_cuda_hybg_free + procedure, pass(a) :: mold => psb_s_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_s_cuda_hybg_to_gpu + final :: s_cuda_hybg_finalize + end type psb_s_cuda_hybg_sparse_mat + + private :: s_cuda_hybg_get_nzeros, s_cuda_hybg_free, s_cuda_hybg_get_fmt, & + & s_cuda_hybg_get_size, s_cuda_hybg_sizeof, s_cuda_hybg_get_nz_row + + + interface + subroutine psb_s_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_s_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_s_base_vect_type, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_s_cuda_hybg_reallocate_nz(nz,a) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_s_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_s_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_s_cuda_hybg_mold(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hybg_mold + end interface + + interface + subroutine psb_s_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_s_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_s_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_s_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_s_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_s_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_s_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_s_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hybg_csmv + end interface + interface + subroutine psb_s_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_cuda_hybg_csmm + end interface + + interface + subroutine psb_s_cuda_hybg_scal(d,a,info,side) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_cuda_hybg_scal + end interface + + interface + subroutine psb_s_cuda_hybg_scals(d,a,info) + import :: psb_s_cuda_hybg_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_s_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function s_cuda_hybg_sizeof + + function s_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function s_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_s_csr_sparse_mat%free() + + return + + end subroutine s_cuda_hybg_free + + subroutine s_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_s_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine s_cuda_hybg_finalize + +end module psb_s_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90 new file mode 100644 index 00000000..2fab51ec --- /dev/null +++ b/cuda/psb_s_cuda_vect_mod.F90 @@ -0,0 +1,2099 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_s_vect_mod + use psb_cuda_env_mod + use psb_i_vect_mod + use psb_i_cuda_vect_mod + use psb_i_vectordev_mod + use psb_s_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_vect_type) :: psb_s_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + real(c_float), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + real(c_float), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => s_cuda_get_nrows + procedure, nopass :: get_fmt => s_cuda_get_fmt + + procedure, pass(x) :: all => s_cuda_all + procedure, pass(x) :: zero => s_cuda_zero + procedure, pass(x) :: asb_m => s_cuda_asb_m + procedure, pass(x) :: sync => s_cuda_sync + procedure, pass(x) :: sync_space => s_cuda_sync_space + procedure, pass(x) :: bld_x => s_cuda_bld_x + procedure, pass(x) :: bld_mn => s_cuda_bld_mn + procedure, pass(x) :: free => s_cuda_free + procedure, pass(x) :: ins_a => s_cuda_ins_a + procedure, pass(x) :: ins_v => s_cuda_ins_v + procedure, pass(x) :: is_host => s_cuda_is_host + procedure, pass(x) :: is_dev => s_cuda_is_dev + procedure, pass(x) :: is_sync => s_cuda_is_sync + procedure, pass(x) :: set_host => s_cuda_set_host + procedure, pass(x) :: set_dev => s_cuda_set_dev + procedure, pass(x) :: set_sync => s_cuda_set_sync + procedure, pass(x) :: set_scal => s_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => s_cuda_set_vect + procedure, pass(x) :: gthzv_x => s_cuda_gthzv_x + procedure, pass(y) :: sctb => s_cuda_sctb + procedure, pass(y) :: sctb_x => s_cuda_sctb_x + procedure, pass(x) :: gthzbuf => s_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => s_cuda_sctb_buf + procedure, pass(x) :: new_buffer => s_cuda_new_buffer + procedure, nopass :: device_wait => s_cuda_device_wait + procedure, pass(x) :: free_buffer => s_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => s_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => s_cuda_dot_v + procedure, pass(x) :: dot_a => s_cuda_dot_a + procedure, pass(y) :: axpby_v => s_cuda_axpby_v + procedure, pass(y) :: axpby_a => s_cuda_axpby_a + procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz + procedure, pass(y) :: mlt_v => s_cuda_mlt_v + procedure, pass(y) :: mlt_a => s_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => s_cuda_mlt_v_2 + procedure, pass(x) :: scal => s_cuda_scal + procedure, pass(x) :: nrm2 => s_cuda_nrm2 + procedure, pass(x) :: amax => s_cuda_amax + procedure, pass(x) :: asum => s_cuda_asum + procedure, pass(x) :: absval1 => s_cuda_absval1 + procedure, pass(x) :: absval2 => s_cuda_absval2 + + final :: s_cuda_vect_finalize + end type psb_s_vect_cuda + + public :: psb_s_vect_cuda_ + private :: constructor + interface psb_s_vect_cuda_ + module procedure constructor + end interface psb_s_vect_cuda_ + +contains + + function constructor(x) result(this) + real(psb_spk_) :: x(:) + type(psb_s_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine s_cuda_device_wait() + call psb_cudaSync() + end subroutine s_cuda_device_wait + + subroutine s_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloat(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloat(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloat(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeFloat(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateFloat(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceFloat(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readFloat(x%dt_buf,y,n) + + end select + + end subroutine s_cuda_gthzv_x + + subroutine s_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_s_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceFloatVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readFloat(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceFloat(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readFloat(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine s_cuda_gthzbuf + + subroutine s_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_s_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine s_cuda_sctb + + subroutine s_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloat(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloat(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeFloat(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeFloat(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateFloat(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeFloat(y%dt_buf,x,n) + info = iscatMultiVecDeviceFloat(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine s_cuda_sctb_x + + subroutine s_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta + class(psb_s_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeFloat(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceFloatVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeFloat(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceFloat(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine s_cuda_sctb_buf + + + subroutine s_cuda_bld_x(x,this) + use psb_base_mod + real(psb_spk_), intent(in) :: this(:) + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine s_cuda_bld_x + + subroutine s_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'s_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine s_cuda_bld_mn + + subroutine s_cuda_set_host(x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine s_cuda_set_host + + subroutine s_cuda_set_dev(x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine s_cuda_set_dev + + subroutine s_cuda_set_sync(x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine s_cuda_set_sync + + function s_cuda_is_dev(x) result(res) + implicit none + class(psb_s_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_cuda_is_dev + + function s_cuda_is_host(x) result(res) + implicit none + class(psb_s_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_cuda_is_host + + function s_cuda_is_sync(x) result(res) + implicit none + class(psb_s_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_cuda_is_sync + + + function s_cuda_get_nrows(x) result(res) + implicit none + class(psb_s_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function s_cuda_get_nrows + + function s_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'sGPU' + end function s_cuda_get_fmt + + subroutine s_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_s_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine s_cuda_all + + subroutine s_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(szero) + end subroutine s_cuda_zero + + subroutine s_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_s_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'s_cuda_sync') + end if + + end subroutine s_cuda_sync + + subroutine s_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine s_cuda_free + + subroutine s_cuda_set_scal(x,val,first,last) + class(psb_s_vect_cuda), intent(inout) :: x + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine s_cuda_set_scal +!!$ +!!$ subroutine s_cuda_set_vect(x,val) +!!$ class(psb_s_vect_cuda), intent(inout) :: x +!!$ real(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_s_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine s_cuda_set_vect + + + + function s_cuda_dot_v(n,x,y) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: ddot + integer(psb_ipk_) :: info + + res = szero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_s_vect + ! + select type(yy => y) + type is (psb_s_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_s_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'s_cuda_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function s_cuda_dot_v + + function s_cuda_dot_a(n,x,y) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + real(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function s_cuda_dot_a + + subroutine s_cuda_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_cuda), intent(inout) :: y + real(psb_spk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_s_vect_cuda) + ! Do something different here + if ((beta /= szero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + class is (psb_s_vect_cuda) + select type(yy => y) + class is (psb_s_vect_cuda) + select type(zz => z) + class is (psb_s_vect_cuda) + ! Do something different here + if ((beta /= szero).and.yy%is_host())& + & call yy%sync() + if ((delta /= szero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) + class is (psb_s_vect_cuda) + select type(yy => y) + class is (psb_s_vect_cuda) + select type(zz => z) + class is (psb_s_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + type is (psb_s_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_s_vect_cuda) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,sone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine s_cuda_mlt_v + + subroutine s_cuda_mlt_a(x, y, info) + use psi_serial_mod + implicit none + real(psb_spk_), intent(in) :: x(:) + class(psb_s_vect_cuda), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_s_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine s_cuda_mlt_a + + subroutine s_cuda_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + real(psb_spk_), intent(in) :: alpha,beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(in) :: y(:) + class(psb_s_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine s_cuda_mlt_a_2 + + subroutine s_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + real(psb_spk_), intent(in) :: alpha,beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_s_vect_cuda) + select type (yy => y) + type is (psb_s_vect_cuda) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= szero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= szero).and.(z%is_dev())) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= szero).and.(z%is_dev())) call z%sync() + call z%psb_s_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine s_cuda_mlt_v_2 + + subroutine s_cuda_scal(alpha, x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + real(psb_spk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine s_cuda_scal + + + function s_cuda_nrm2(n,x) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDevice(res,n,x%deviceVect) + + end function s_cuda_nrm2 + + function s_cuda_amax(n,x) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDevice(res,n,x%deviceVect) + + end function s_cuda_amax + + function s_cuda_asum(n,x) result(res) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDevice(res,n,x%deviceVect) + + end function s_cuda_asum + + subroutine s_cuda_absval1(x) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,sone,x%deviceVect) + + end subroutine s_cuda_absval1 + + subroutine s_cuda_absval2(x,y) + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_s_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,sone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_s_base_vect_type%absval(y) + end select + end subroutine s_cuda_absval2 + + + subroutine s_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine s_cuda_vect_finalize + + subroutine s_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_s_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_s_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceFloat(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine s_cuda_ins_v + + subroutine s_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_s_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_s_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine s_cuda_ins_a + +end module psb_s_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_s_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_s_multivect_mod + use psb_s_base_multivect_mod + use psb_cuda_env_mod + use psb_i_multivect_mod + use psb_i_cuda_multivect_mod + use psb_s_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_multivect_type) :: psb_s_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => s_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => s_cuda_multi_get_ncols + procedure, nopass :: get_fmt => s_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => s_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => s_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => s_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => s_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => s_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => s_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => s_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => s_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => s_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => s_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => s_cuda_multi_amax +!!$ procedure, pass(x) :: asum => s_cuda_multi_asum + procedure, pass(x) :: all => s_cuda_multi_all + procedure, pass(x) :: zero => s_cuda_multi_zero + procedure, pass(x) :: asb => s_cuda_multi_asb + procedure, pass(x) :: sync => s_cuda_multi_sync + procedure, pass(x) :: sync_space => s_cuda_multi_sync_space + procedure, pass(x) :: bld_x => s_cuda_multi_bld_x + procedure, pass(x) :: bld_n => s_cuda_multi_bld_n + procedure, pass(x) :: free => s_cuda_multi_free + procedure, pass(x) :: ins => s_cuda_multi_ins + procedure, pass(x) :: is_host => s_cuda_multi_is_host + procedure, pass(x) :: is_dev => s_cuda_multi_is_dev + procedure, pass(x) :: is_sync => s_cuda_multi_is_sync + procedure, pass(x) :: set_host => s_cuda_multi_set_host + procedure, pass(x) :: set_dev => s_cuda_multi_set_dev + procedure, pass(x) :: set_sync => s_cuda_multi_set_sync + procedure, pass(x) :: set_scal => s_cuda_multi_set_scal + procedure, pass(x) :: set_vect => s_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => s_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => s_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => s_cuda_multi_sctb_x + final :: s_cuda_multi_vect_finalize + end type psb_s_multivect_cuda + + public :: psb_s_multivect_cuda + private :: constructor + interface psb_s_multivect_cuda + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + real(psb_spk_) :: x(:,:) + type(psb_s_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + + +!!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_spk_) :: y(:) +!!$ class(psb_s_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine s_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine s_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ real(psb_spk_) :: beta, x(:) +!!$ class(psb_s_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_s_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine s_cuda_multi_sctb +!!$ +!!$ subroutine s_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ real(psb_spk_) :: beta, x(:) +!!$ class(psb_s_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine s_cuda_multi_sctb_x + + + subroutine s_cuda_multi_bld_x(x,this) + use psb_base_mod + real(psb_spk_), intent(in) :: this(:,:) + class(psb_s_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine s_cuda_multi_bld_x + + subroutine s_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'s_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine s_cuda_multi_bld_n + + + subroutine s_cuda_multi_set_host(x) + implicit none + class(psb_s_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine s_cuda_multi_set_host + + subroutine s_cuda_multi_set_dev(x) + implicit none + class(psb_s_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine s_cuda_multi_set_dev + + subroutine s_cuda_multi_set_sync(x) + implicit none + class(psb_s_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine s_cuda_multi_set_sync + + function s_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_cuda_multi_is_dev + + function s_cuda_multi_is_host(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_cuda_multi_is_host + + function s_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_cuda_multi_is_sync + + + function s_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function s_cuda_multi_get_nrows + + function s_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_s_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function s_cuda_multi_get_ncols + + function s_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'sGPU' + end function s_cuda_multi_get_fmt + +!!$ function s_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ class(psb_s_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ real(psb_spk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = szero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_s_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_s_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_s_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'s_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function s_cuda_multi_dot_v +!!$ +!!$ function s_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ real(psb_spk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ real(psb_spk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function s_cuda_multi_dot_a +!!$ +!!$ subroutine s_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_s_base_multivect_type), intent(inout) :: x +!!$ class(psb_s_multivect_cuda), intent(inout) :: y +!!$ real(psb_spk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_s_base_multivect_type) +!!$ if ((beta /= szero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_s_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= szero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_s_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_s_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine s_cuda_multi_mlt_v +!!$ +!!$ subroutine s_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: x(:) +!!$ class(psb_s_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_s_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine s_cuda_multi_mlt_a +!!$ +!!$ subroutine s_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: alpha,beta +!!$ real(psb_spk_), intent(in) :: x(:) +!!$ real(psb_spk_), intent(in) :: y(:) +!!$ class(psb_s_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine s_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine s_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: alpha,beta +!!$ class(psb_s_base_multivect_type), intent(inout) :: x +!!$ class(psb_s_base_multivect_type), intent(inout) :: y +!!$ class(psb_s_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_s_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_s_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_s_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine s_cuda_multi_mlt_v_2 + + + subroutine s_cuda_multi_set_scal(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x + real(psb_spk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_s_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine s_cuda_multi_set_scal + + subroutine s_cuda_multi_set_vect(x,val) + class(psb_s_multivect_cuda), intent(inout) :: x + real(psb_spk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_s_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine s_cuda_multi_set_vect + + + +!!$ subroutine s_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ real(psb_spk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_s_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine s_cuda_multi_scal +!!$ +!!$ +!!$ function s_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function s_cuda_multi_nrm2 +!!$ +!!$ function s_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function s_cuda_multi_amax +!!$ +!!$ function s_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_s_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_spk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function s_cuda_multi_asum + + subroutine s_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=szero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'s_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine s_cuda_multi_all + + subroutine s_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_s_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=szero + call x%set_host() + end subroutine s_cuda_multi_zero + + subroutine s_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_s_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc z_cuda_csrg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_csrg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_csrg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_csrg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_csrg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_csrg_csmv + procedure, pass(a) :: scals => psb_z_cuda_csrg_scals + procedure, pass(a) :: scalv => psb_z_cuda_csrg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_csrg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_csrg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_csrg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_csrg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_csrg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_csrg_from_fmt + procedure, pass(a) :: free => z_cuda_csrg_free + procedure, pass(a) :: mold => psb_z_cuda_csrg_mold + procedure, pass(a) :: is_host => z_cuda_csrg_is_host + procedure, pass(a) :: is_dev => z_cuda_csrg_is_dev + procedure, pass(a) :: is_sync => z_cuda_csrg_is_sync + procedure, pass(a) :: set_host => z_cuda_csrg_set_host + procedure, pass(a) :: set_dev => z_cuda_csrg_set_dev + procedure, pass(a) :: set_sync => z_cuda_csrg_set_sync + procedure, pass(a) :: sync => z_cuda_csrg_sync + procedure, pass(a) :: to_gpu => psb_z_cuda_csrg_to_gpu + procedure, pass(a) :: from_gpu => psb_z_cuda_csrg_from_gpu + final :: z_cuda_csrg_finalize + end type psb_z_cuda_csrg_sparse_mat + + private :: z_cuda_csrg_get_nzeros, z_cuda_csrg_free, z_cuda_csrg_get_fmt, & + & z_cuda_csrg_get_size, z_cuda_csrg_sizeof, z_cuda_csrg_get_nz_row + + + interface + subroutine psb_z_cuda_csrg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_inner_vect_sv + end interface + + + interface + subroutine psb_z_cuda_csrg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_vect_mv + end interface + + interface + subroutine psb_z_cuda_csrg_reallocate_nz(nz,a) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_csrg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_csrg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_csrg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_csrg_mold(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_mold + end interface + + interface + subroutine psb_z_cuda_csrg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_csrg_to_gpu + end interface + + interface + subroutine psb_z_cuda_csrg_from_gpu(a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_from_gpu + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_coo(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_coo + end interface + + interface + subroutine psb_z_cuda_mv_csrg_from_fmt(a,b,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_csrg_from_fmt + end interface + + interface + subroutine psb_z_cuda_csrg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmv + end interface + interface + subroutine psb_z_cuda_csrg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_csrg_csmm + end interface + + interface + subroutine psb_z_cuda_csrg_scal(d,a,info,side) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_csrg_scal + end interface + + interface + subroutine psb_z_cuda_csrg_scals(d,a,info) + import :: psb_z_cuda_csrg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_csrg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_csrg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_csrg_sizeof + + function z_cuda_csrg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSRG' + end function z_cuda_csrg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + + subroutine z_cuda_csrg_set_host(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_cuda_csrg_set_host + + subroutine z_cuda_csrg_set_dev(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_cuda_csrg_set_dev + + subroutine z_cuda_csrg_set_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_cuda_csrg_set_sync + + function z_cuda_csrg_is_dev(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_cuda_csrg_is_dev + + function z_cuda_csrg_is_host(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_cuda_csrg_is_host + + function z_cuda_csrg_is_sync(a) result(res) + implicit none + class(psb_z_cuda_csrg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_cuda_csrg_is_sync + + + subroutine z_cuda_csrg_sync(a) + implicit none + class(psb_z_cuda_csrg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_csrg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_cuda_csrg_sync + + subroutine z_cuda_csrg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_cuda_csrg_free + + subroutine z_cuda_csrg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + + type(psb_z_cuda_csrg_sparse_mat), intent(inout) :: a + + info = CSRGDeviceFree(a%deviceMat) + + return + + end subroutine z_cuda_csrg_finalize + +end module psb_z_cuda_csrg_mat_mod diff --git a/cuda/psb_z_cuda_diag_mat_mod.F90 b/cuda/psb_z_cuda_diag_mat_mod.F90 new file mode 100644 index 00000000..592d50b2 --- /dev/null +++ b/cuda/psb_z_cuda_diag_mat_mod.F90 @@ -0,0 +1,289 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_diag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_z_dia_mat_mod + + type, extends(psb_z_dia_sparse_mat) :: psb_z_cuda_diag_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_cuda_diag_get_fmt + procedure, pass(a) :: sizeof => z_cuda_diag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_diag_vect_mv +! procedure, pass(a) :: csmm => psb_z_cuda_diag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_diag_csmv +! procedure, pass(a) :: in_vect_sv => psb_z_cuda_diag_inner_vect_sv +! procedure, pass(a) :: scals => psb_z_cuda_diag_scals +! procedure, pass(a) :: scalv => psb_z_cuda_diag_scal +! procedure, pass(a) :: reallocate_nz => psb_z_cuda_diag_reallocate_nz +! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_diag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_diag_from_coo +! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_diag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_diag_from_coo +! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_diag_from_fmt + procedure, pass(a) :: free => z_cuda_diag_free + procedure, pass(a) :: mold => psb_z_cuda_diag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_diag_to_gpu + final :: z_cuda_diag_finalize + end type psb_z_cuda_diag_sparse_mat + + private :: z_cuda_diag_get_nzeros, z_cuda_diag_free, z_cuda_diag_get_fmt, & + & z_cuda_diag_get_size, z_cuda_diag_sizeof, z_cuda_diag_get_nz_row + + + interface + subroutine psb_z_cuda_diag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_diag_vect_mv + end interface + + interface + subroutine psb_z_cuda_diag_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_diag_inner_vect_sv + end interface + + interface + subroutine psb_z_cuda_diag_reallocate_nz(nz,a) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_diag_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_diag_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_diag_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_diag_mold(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_diag_mold + end interface + + interface + subroutine psb_z_cuda_diag_to_gpu(a,info, nzrm) + import :: psb_z_cuda_diag_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_diag_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_diag_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_diag_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_diag_from_coo(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_diag_from_coo + end interface + + + interface + subroutine psb_z_cuda_mv_diag_from_fmt(a,b,info) + import :: psb_z_cuda_diag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_diag_from_fmt + end interface + + interface + subroutine psb_z_cuda_diag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_diag_csmv + end interface + interface + subroutine psb_z_cuda_diag_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_diag_csmm + end interface + + interface + subroutine psb_z_cuda_diag_scal(d,a,info, side) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_diag_scal + end interface + + interface + subroutine psb_z_cuda_diag_scals(d,a,info) + import :: psb_z_cuda_diag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_diag_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_diag_sizeof(a) result(res) + implicit none + class(psb_z_cuda_diag_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_diag_sizeof + + function z_cuda_diag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIAG' + end function z_cuda_diag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_diag_free(a) + use diagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_dia_sparse_mat%free() + + return + + end subroutine z_cuda_diag_free + + subroutine z_cuda_diag_finalize(a) + use diagdev_mod + implicit none + type(psb_z_cuda_diag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_cuda_diag_finalize + +end module psb_z_cuda_diag_mat_mod diff --git a/cuda/psb_z_cuda_dnsg_mat_mod.F90 b/cuda/psb_z_cuda_dnsg_mat_mod.F90 new file mode 100644 index 00000000..3669d6ac --- /dev/null +++ b/cuda/psb_z_cuda_dnsg_mat_mod.F90 @@ -0,0 +1,275 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_dnsg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_dns_mat_mod + use dnsdev_mod + + type, extends(psb_z_dns_sparse_mat) :: psb_z_cuda_dnsg_sparse_mat + ! + ! ITPACK/DNS format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of DNS, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_cuda_dnsg_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_dnsg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_dnsg_vect_mv +!!$ procedure, pass(a) :: csmm => psb_z_cuda_dnsg_csmm +!!$ procedure, pass(a) :: csmv => psb_z_cuda_dnsg_csmv +!!$ procedure, pass(a) :: in_vect_sv => psb_z_cuda_dnsg_inner_vect_sv +!!$ procedure, pass(a) :: scals => psb_z_cuda_dnsg_scals +!!$ procedure, pass(a) :: scalv => psb_z_cuda_dnsg_scal +!!$ procedure, pass(a) :: reallocate_nz => psb_z_cuda_dnsg_reallocate_nz +!!$ procedure, pass(a) :: allocate_mnnz => psb_z_cuda_dnsg_allocate_mnnz + ! Note: we *do* need the TO methods, because of the need to invoke SYNC + ! + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_dnsg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_dnsg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_dnsg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_dnsg_from_fmt + procedure, pass(a) :: free => z_cuda_dnsg_free + procedure, pass(a) :: mold => psb_z_cuda_dnsg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_dnsg_to_gpu + final :: z_cuda_dnsg_finalize + end type psb_z_cuda_dnsg_sparse_mat + + private :: z_cuda_dnsg_get_nzeros, z_cuda_dnsg_free, z_cuda_dnsg_get_fmt, & + & z_cuda_dnsg_get_size, z_cuda_dnsg_get_nz_row + + + interface + subroutine psb_z_cuda_dnsg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_dnsg_vect_mv + end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_dnsg_inner_vect_sv +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_dnsg_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_cuda_dnsg_allocate_mnnz +!!$ end interface + + interface + subroutine psb_z_cuda_dnsg_mold(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_dnsg_mold + end interface + + interface + subroutine psb_z_cuda_dnsg_to_gpu(a,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_dnsg_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_dnsg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_dnsg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_dnsg_from_coo(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_dnsg_from_coo + end interface + + + interface + subroutine psb_z_cuda_mv_dnsg_from_fmt(a,b,info) + import :: psb_z_cuda_dnsg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_dnsg_from_fmt + end interface + +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_csmv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_dnsg_csmv +!!$ end interface +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_dnsg_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_scal(d,a,info, side) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_cuda_dnsg_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_dnsg_scals(d,a,info) +!!$ import :: psb_z_cuda_dnsg_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cuda_dnsg_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + + function z_cuda_dnsg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNSG' + end function z_cuda_dnsg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_dnsg_free(a) + use dnsdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_dns_sparse_mat%free() + + return + + end subroutine z_cuda_dnsg_free + + subroutine z_cuda_dnsg_finalize(a) + use dnsdev_mod + implicit none + type(psb_z_cuda_dnsg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeDnsDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_cuda_dnsg_finalize + +end module psb_z_cuda_dnsg_mat_mod diff --git a/cuda/psb_z_cuda_elg_mat_mod.F90 b/cuda/psb_z_cuda_elg_mat_mod.F90 new file mode 100644 index 00000000..bd36e4b6 --- /dev/null +++ b/cuda/psb_z_cuda_elg_mat_mod.F90 @@ -0,0 +1,456 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_elg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_ell_mat_mod + use psb_i_cuda_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_ell_sparse_mat) :: psb_z_cuda_elg_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of ELL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer(psb_ipk_) :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_cuda_elg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_elg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_elg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_elg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_elg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_elg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_elg_scals + procedure, pass(a) :: scalv => psb_z_cuda_elg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_elg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_elg_allocate_mnnz + procedure, pass(a) :: reinit => z_cuda_elg_reinit + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_elg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_elg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_elg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_elg_from_fmt + procedure, pass(a) :: free => z_cuda_elg_free + procedure, pass(a) :: mold => psb_z_cuda_elg_mold + procedure, pass(a) :: csput_a => psb_z_cuda_elg_csput_a + procedure, pass(a) :: csput_v => psb_z_cuda_elg_csput_v + procedure, pass(a) :: is_host => z_cuda_elg_is_host + procedure, pass(a) :: is_dev => z_cuda_elg_is_dev + procedure, pass(a) :: is_sync => z_cuda_elg_is_sync + procedure, pass(a) :: set_host => z_cuda_elg_set_host + procedure, pass(a) :: set_dev => z_cuda_elg_set_dev + procedure, pass(a) :: set_sync => z_cuda_elg_set_sync + procedure, pass(a) :: sync => z_cuda_elg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_elg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_elg_to_gpu + procedure, pass(a) :: asb => psb_z_cuda_elg_asb + final :: z_cuda_elg_finalize + end type psb_z_cuda_elg_sparse_mat + + private :: z_cuda_elg_get_nzeros, z_cuda_elg_free, z_cuda_elg_get_fmt, & + & z_cuda_elg_get_size, z_cuda_elg_sizeof, z_cuda_elg_get_nz_row, z_cuda_elg_sync + + + interface + subroutine psb_z_cuda_elg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_elg_vect_mv + end interface + + interface + subroutine psb_z_cuda_elg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_elg_inner_vect_sv + end interface + + interface + subroutine psb_z_cuda_elg_reallocate_nz(nz,a) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_elg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_elg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_elg_mold(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_mold + end interface + + interface + subroutine psb_z_cuda_elg_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_csput_a + end interface + + interface + subroutine psb_z_cuda_elg_csput_v(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_, psb_z_base_vect_type,& + & psb_i_base_vect_type + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_vect_type), intent(inout) :: val + class(psb_i_base_vect_type), intent(inout) :: ia, ja + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_csput_v + end interface + + interface + subroutine psb_z_cuda_elg_from_gpu(a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_from_gpu + end interface + + interface + subroutine psb_z_cuda_elg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_elg_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_elg_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_elg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_elg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_elg_from_coo(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_elg_from_coo + end interface + + + interface + subroutine psb_z_cuda_mv_elg_from_fmt(a,b,info) + import :: psb_z_cuda_elg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_elg_from_fmt + end interface + + interface + subroutine psb_z_cuda_elg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_elg_csmv + end interface + interface + subroutine psb_z_cuda_elg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_elg_csmm + end interface + + interface + subroutine psb_z_cuda_elg_scal(d,a,info, side) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_elg_scal + end interface + + interface + subroutine psb_z_cuda_elg_scals(d,a,info) + import :: psb_z_cuda_elg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_elg_scals + end interface + + interface + subroutine psb_z_cuda_elg_asb(a) + import :: psb_z_cuda_elg_sparse_mat + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_elg_asb + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_elg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_elg_sizeof + + function z_cuda_elg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELG' + end function z_cuda_elg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + subroutine z_cuda_elg_reinit(a,clear) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + integer(psb_ipk_) :: isz, err_act + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev().or.a%is_sync()) then + if (clear_) call zeroEllDevice(a%deviceMat) + call a%set_dev() + else if (a%is_host()) then + a%val(:,:) = zzero + end if + call a%set_upd() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine z_cuda_elg_reinit + + subroutine z_cuda_elg_free(a) + use elldev_mod + implicit none + integer(psb_ipk_) :: info + + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_ell_sparse_mat%free() + call a%set_sync() + + return + + end subroutine z_cuda_elg_free + + subroutine z_cuda_elg_sync(a) + implicit none + class(psb_z_cuda_elg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_elg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_cuda_elg_sync + + subroutine z_cuda_elg_set_host(a) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_cuda_elg_set_host + + subroutine z_cuda_elg_set_dev(a) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_cuda_elg_set_dev + + subroutine z_cuda_elg_set_sync(a) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_cuda_elg_set_sync + + function z_cuda_elg_is_dev(a) result(res) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_cuda_elg_is_dev + + function z_cuda_elg_is_host(a) result(res) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_cuda_elg_is_host + + function z_cuda_elg_is_sync(a) result(res) + implicit none + class(psb_z_cuda_elg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_cuda_elg_is_sync + + subroutine z_cuda_elg_finalize(a) + use elldev_mod + implicit none + type(psb_z_cuda_elg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeEllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + return + + end subroutine z_cuda_elg_finalize + +end module psb_z_cuda_elg_mat_mod diff --git a/cuda/psb_z_cuda_hdiag_mat_mod.F90 b/cuda/psb_z_cuda_hdiag_mat_mod.F90 new file mode 100644 index 00000000..70897664 --- /dev/null +++ b/cuda/psb_z_cuda_hdiag_mat_mod.F90 @@ -0,0 +1,268 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_hdiag_mat_mod + + use iso_c_binding + use psb_base_mod + use psb_z_hdia_mat_mod + + type, extends(psb_z_hdia_sparse_mat) :: psb_z_cuda_hdiag_sparse_mat + ! + type(c_ptr) :: deviceMat = c_null_ptr + + contains + procedure, nopass :: get_fmt => z_cuda_hdiag_get_fmt + ! procedure, pass(a) :: sizeof => z_cuda_hdiag_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hdiag_vect_mv + ! procedure, pass(a) :: csmm => psb_z_cuda_hdiag_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hdiag_csmv + ! procedure, pass(a) :: in_vect_sv => psb_z_cuda_hdiag_inner_vect_sv + ! procedure, pass(a) :: scals => psb_z_cuda_hdiag_scals + ! procedure, pass(a) :: scalv => psb_z_cuda_hdiag_scal + ! procedure, pass(a) :: reallocate_nz => psb_z_cuda_hdiag_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hdiag_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hdiag_from_coo + ! procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hdiag_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hdiag_from_coo + ! procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hdiag_from_fmt + procedure, pass(a) :: free => z_cuda_hdiag_free + procedure, pass(a) :: mold => psb_z_cuda_hdiag_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hdiag_to_gpu + final :: z_cuda_hdiag_finalize + end type psb_z_cuda_hdiag_sparse_mat + + private :: z_cuda_hdiag_get_nzeros, z_cuda_hdiag_free, z_cuda_hdiag_get_fmt, & + & z_cuda_hdiag_get_size, z_cuda_hdiag_sizeof, z_cuda_hdiag_get_nz_row + + + interface + subroutine psb_z_cuda_hdiag_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hdiag_vect_mv + end interface + +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_inner_vect_sv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_ipk_, psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_z_base_vect_type +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x, y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_hdiag_inner_vect_sv +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_reallocate_nz(nz,a) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_cuda_hdiag_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_allocate_mnnz(m,n,a,nz) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: m,n +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ integer(psb_ipk_), intent(in), optional :: nz +!!$ end subroutine psb_z_cuda_hdiag_allocate_mnnz +!!$ end interface + + interface + subroutine psb_z_cuda_hdiag_mold(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hdiag_mold + end interface + + interface + subroutine psb_z_cuda_hdiag_to_gpu(a,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hdiag_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hdiag_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_cuda_cp_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cuda_cp_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_z_cuda_mv_hdiag_from_coo(a,b,info) + import :: psb_z_cuda_hdiag_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hdiag_from_coo + end interface + +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_mv_hdiag_from_fmt(a,b,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cuda_mv_hdiag_from_fmt +!!$ end interface +!!$ + interface + subroutine psb_z_cuda_hdiag_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hdiag_csmv + end interface + +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_cuda_hdiag_csmm +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_scal(d,a,info, side) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_cuda_hdiag_scal +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cuda_hdiag_scals(d,a,info) +!!$ import :: psb_z_cuda_hdiag_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cuda_hdiag_scals +!!$ end interface +!!$ + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + function z_cuda_hdiag_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIAG' + end function z_cuda_hdiag_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_hdiag_free(a) + use hdiagdev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hdia_sparse_mat%free() + + return + + end subroutine z_cuda_hdiag_free + + subroutine z_cuda_hdiag_finalize(a) + use hdiagdev_mod + implicit none + type(psb_z_cuda_hdiag_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHdiagDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hdia_sparse_mat%free() + + return + end subroutine z_cuda_hdiag_finalize + +end module psb_z_cuda_hdiag_mat_mod diff --git a/cuda/psb_z_cuda_hlg_mat_mod.F90 b/cuda/psb_z_cuda_hlg_mat_mod.F90 new file mode 100644 index 00000000..2acf43f1 --- /dev/null +++ b/cuda/psb_z_cuda_hlg_mat_mod.F90 @@ -0,0 +1,379 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_hlg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_hll_mat_mod + + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_hll_sparse_mat) :: psb_z_cuda_hlg_sparse_mat + ! + ! ITPACK/HLL format, extended. + ! We are adding here the routines to create a copy of the data + ! into the GPU. + ! If HAVE_SPGPU is undefined this is just + ! a copy of HLL, indistinguishable. + ! + type(c_ptr) :: deviceMat = c_null_ptr + integer :: devstate = is_host + + contains + procedure, nopass :: get_fmt => z_cuda_hlg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hlg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hlg_vect_mv + procedure, pass(a) :: csmm => psb_z_cuda_hlg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hlg_csmv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hlg_inner_vect_sv + procedure, pass(a) :: scals => psb_z_cuda_hlg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hlg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hlg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hlg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hlg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hlg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hlg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hlg_from_fmt + procedure, pass(a) :: free => z_cuda_hlg_free + procedure, pass(a) :: mold => psb_z_cuda_hlg_mold + procedure, pass(a) :: is_host => z_cuda_hlg_is_host + procedure, pass(a) :: is_dev => z_cuda_hlg_is_dev + procedure, pass(a) :: is_sync => z_cuda_hlg_is_sync + procedure, pass(a) :: set_host => z_cuda_hlg_set_host + procedure, pass(a) :: set_dev => z_cuda_hlg_set_dev + procedure, pass(a) :: set_sync => z_cuda_hlg_set_sync + procedure, pass(a) :: sync => z_cuda_hlg_sync + procedure, pass(a) :: from_gpu => psb_z_cuda_hlg_from_gpu + procedure, pass(a) :: to_gpu => psb_z_cuda_hlg_to_gpu + final :: z_cuda_hlg_finalize + end type psb_z_cuda_hlg_sparse_mat + + private :: z_cuda_hlg_get_nzeros, z_cuda_hlg_free, z_cuda_hlg_get_fmt, & + & z_cuda_hlg_get_size, z_cuda_hlg_sizeof, z_cuda_hlg_get_nz_row + + + interface + subroutine psb_z_cuda_hlg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hlg_vect_mv + end interface + + interface + subroutine psb_z_cuda_hlg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_ipk_, psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_z_base_vect_type + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hlg_inner_vect_sv + end interface + + interface + subroutine psb_z_cuda_hlg_reallocate_nz(nz,a) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hlg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_hlg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_hlg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_hlg_mold(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hlg_mold + end interface + + interface + subroutine psb_z_cuda_hlg_from_gpu(a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hlg_from_gpu + end interface + + interface + subroutine psb_z_cuda_hlg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hlg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_hlg_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hlg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hlg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_hlg_from_coo(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hlg_from_coo + end interface + + + interface + subroutine psb_z_cuda_mv_hlg_from_fmt(a,b,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hlg_from_fmt + end interface + + interface + subroutine psb_z_cuda_hlg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hlg_csmv + end interface + interface + subroutine psb_z_cuda_hlg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hlg_csmm + end interface + + interface + subroutine psb_z_cuda_hlg_scal(d,a,info, side) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_hlg_scal + end interface + + interface + subroutine psb_z_cuda_hlg_scals(d,a,info) + import :: psb_z_cuda_hlg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hlg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_hlg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_hlg_sizeof + + function z_cuda_hlg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLG' + end function z_cuda_hlg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_hlg_free(a) + use hlldev_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + call a%psb_z_hll_sparse_mat%free() + + return + + end subroutine z_cuda_hlg_free + + + subroutine z_cuda_hlg_sync(a) + implicit none + class(psb_z_cuda_hlg_sparse_mat), target, intent(in) :: a + class(psb_z_cuda_hlg_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (tmpa%is_host()) then + call tmpa%to_gpu(info) + else if (tmpa%is_dev()) then + call tmpa%from_gpu(info) + end if + call tmpa%set_sync() + return + + end subroutine z_cuda_hlg_sync + + subroutine z_cuda_hlg_set_host(a) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_cuda_hlg_set_host + + subroutine z_cuda_hlg_set_dev(a) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_cuda_hlg_set_dev + + subroutine z_cuda_hlg_set_sync(a) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_cuda_hlg_set_sync + + function z_cuda_hlg_is_dev(a) result(res) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_cuda_hlg_is_dev + + function z_cuda_hlg_is_host(a) result(res) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_cuda_hlg_is_host + + function z_cuda_hlg_is_sync(a) result(res) + implicit none + class(psb_z_cuda_hlg_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_cuda_hlg_is_sync + + + subroutine z_cuda_hlg_finalize(a) + use hlldev_mod + implicit none + type(psb_z_cuda_hlg_sparse_mat), intent(inout) :: a + + if (c_associated(a%deviceMat)) & + & call freeHllDevice(a%deviceMat) + a%deviceMat = c_null_ptr + + return + end subroutine z_cuda_hlg_finalize + +end module psb_z_cuda_hlg_mat_mod diff --git a/cuda/psb_z_cuda_hybg_mat_mod.F90 b/cuda/psb_z_cuda_hybg_mat_mod.F90 new file mode 100644 index 00000000..96620300 --- /dev/null +++ b/cuda/psb_z_cuda_hybg_mat_mod.F90 @@ -0,0 +1,287 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +#if CUDA_SHORT_VERSION <= 10 + +module psb_z_cuda_hybg_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use cusparse_mod + + type, extends(psb_z_csr_sparse_mat) :: psb_z_cuda_hybg_sparse_mat + ! + ! HYBG. An interface to the cuSPARSE HYB + ! On the CPU side we keep a CSR storage. + ! + ! + ! + ! + type(z_Hmat) :: deviceMat + + contains + procedure, nopass :: get_fmt => z_cuda_hybg_get_fmt + procedure, pass(a) :: sizeof => z_cuda_hybg_sizeof + procedure, pass(a) :: vect_mv => psb_z_cuda_hybg_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_cuda_hybg_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_cuda_hybg_csmm + procedure, pass(a) :: csmv => psb_z_cuda_hybg_csmv + procedure, pass(a) :: scals => psb_z_cuda_hybg_scals + procedure, pass(a) :: scalv => psb_z_cuda_hybg_scal + procedure, pass(a) :: reallocate_nz => psb_z_cuda_hybg_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_cuda_hybg_allocate_mnnz + ! Note: we do *not* need the TO methods, because the parent type + ! methods will work. + procedure, pass(a) :: cp_from_coo => psb_z_cuda_cp_hybg_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_cuda_cp_hybg_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_cuda_mv_hybg_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_cuda_mv_hybg_from_fmt + procedure, pass(a) :: free => z_cuda_hybg_free + procedure, pass(a) :: mold => psb_z_cuda_hybg_mold + procedure, pass(a) :: to_gpu => psb_z_cuda_hybg_to_gpu + final :: z_cuda_hybg_finalize + end type psb_z_cuda_hybg_sparse_mat + + private :: z_cuda_hybg_get_nzeros, z_cuda_hybg_free, z_cuda_hybg_get_fmt, & + & z_cuda_hybg_get_size, z_cuda_hybg_sizeof, z_cuda_hybg_get_nz_row + + + interface + subroutine psb_z_cuda_hybg_inner_vect_sv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hybg_inner_vect_sv + end interface + + interface + subroutine psb_z_cuda_hybg_vect_mv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_z_base_vect_type, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hybg_vect_mv + end interface + + interface + subroutine psb_z_cuda_hybg_reallocate_nz(nz,a) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + end subroutine psb_z_cuda_hybg_reallocate_nz + end interface + + interface + subroutine psb_z_cuda_hybg_allocate_mnnz(m,n,a,nz) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_cuda_hybg_allocate_mnnz + end interface + + interface + subroutine psb_z_cuda_hybg_mold(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hybg_mold + end interface + + interface + subroutine psb_z_cuda_hybg_to_gpu(a,info, nzrm) + import :: psb_z_cuda_hybg_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: nzrm + end subroutine psb_z_cuda_hybg_to_gpu + end interface + + interface + subroutine psb_z_cuda_cp_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hybg_from_coo + end interface + + interface + subroutine psb_z_cuda_cp_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_cp_hybg_from_fmt + end interface + + interface + subroutine psb_z_cuda_mv_hybg_from_coo(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hybg_from_coo + end interface + + interface + subroutine psb_z_cuda_mv_hybg_from_fmt(a,b,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_mv_hybg_from_fmt + end interface + + interface + subroutine psb_z_cuda_hybg_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hybg_csmv + end interface + interface + subroutine psb_z_cuda_hybg_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_cuda_hybg_csmm + end interface + + interface + subroutine psb_z_cuda_hybg_scal(d,a,info,side) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_cuda_hybg_scal + end interface + + interface + subroutine psb_z_cuda_hybg_scals(d,a,info) + import :: psb_z_cuda_hybg_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cuda_hybg_scals + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_cuda_hybg_sizeof(a) result(res) + implicit none + class(psb_z_cuda_hybg_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + ! Should we account for the shadow data structure + ! on the GPU device side? + ! res = 2*res + + end function z_cuda_hybg_sizeof + + function z_cuda_hybg_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HYBG' + end function z_cuda_hybg_get_fmt + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_cuda_hybg_free(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + class(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + call a%psb_z_csr_sparse_mat%free() + + return + + end subroutine z_cuda_hybg_free + + subroutine z_cuda_hybg_finalize(a) + use cusparse_mod + implicit none + integer(psb_ipk_) :: info + type(psb_z_cuda_hybg_sparse_mat), intent(inout) :: a + + info = HYBGDeviceFree(a%deviceMat) + + return + end subroutine z_cuda_hybg_finalize + +end module psb_z_cuda_hybg_mat_mod +#endif diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90 new file mode 100644 index 00000000..769f9ea9 --- /dev/null +++ b/cuda/psb_z_cuda_vect_mod.F90 @@ -0,0 +1,2099 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_cuda_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_z_vect_mod + use psb_cuda_env_mod + use psb_i_vect_mod + use psb_i_cuda_vect_mod + use psb_i_vectordev_mod + use psb_z_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_vect_type) :: psb_z_vect_cuda + integer :: state = is_host + type(c_ptr) :: deviceVect = c_null_ptr + complex(c_double_complex), allocatable :: pinned_buffer(:) + type(c_ptr) :: dt_p_buf = c_null_ptr + complex(c_double_complex), allocatable :: buffer(:) + type(c_ptr) :: dt_buf = c_null_ptr + integer :: dt_buf_sz = 0 + type(c_ptr) :: i_buf = c_null_ptr + integer :: i_buf_sz = 0 + contains + procedure, pass(x) :: get_nrows => z_cuda_get_nrows + procedure, nopass :: get_fmt => z_cuda_get_fmt + + procedure, pass(x) :: all => z_cuda_all + procedure, pass(x) :: zero => z_cuda_zero + procedure, pass(x) :: asb_m => z_cuda_asb_m + procedure, pass(x) :: sync => z_cuda_sync + procedure, pass(x) :: sync_space => z_cuda_sync_space + procedure, pass(x) :: bld_x => z_cuda_bld_x + procedure, pass(x) :: bld_mn => z_cuda_bld_mn + procedure, pass(x) :: free => z_cuda_free + procedure, pass(x) :: ins_a => z_cuda_ins_a + procedure, pass(x) :: ins_v => z_cuda_ins_v + procedure, pass(x) :: is_host => z_cuda_is_host + procedure, pass(x) :: is_dev => z_cuda_is_dev + procedure, pass(x) :: is_sync => z_cuda_is_sync + procedure, pass(x) :: set_host => z_cuda_set_host + procedure, pass(x) :: set_dev => z_cuda_set_dev + procedure, pass(x) :: set_sync => z_cuda_set_sync + procedure, pass(x) :: set_scal => z_cuda_set_scal +!!$ procedure, pass(x) :: set_vect => z_cuda_set_vect + procedure, pass(x) :: gthzv_x => z_cuda_gthzv_x + procedure, pass(y) :: sctb => z_cuda_sctb + procedure, pass(y) :: sctb_x => z_cuda_sctb_x + procedure, pass(x) :: gthzbuf => z_cuda_gthzbuf + procedure, pass(y) :: sctb_buf => z_cuda_sctb_buf + procedure, pass(x) :: new_buffer => z_cuda_new_buffer + procedure, nopass :: device_wait => z_cuda_device_wait + procedure, pass(x) :: free_buffer => z_cuda_free_buffer + procedure, pass(x) :: maybe_free_buffer => z_cuda_maybe_free_buffer + procedure, pass(x) :: dot_v => z_cuda_dot_v + procedure, pass(x) :: dot_a => z_cuda_dot_a + procedure, pass(y) :: axpby_v => z_cuda_axpby_v + procedure, pass(y) :: axpby_a => z_cuda_axpby_a + procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz + procedure, pass(y) :: mlt_v => z_cuda_mlt_v + procedure, pass(y) :: mlt_a => z_cuda_mlt_a + procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => z_cuda_mlt_v_2 + procedure, pass(x) :: scal => z_cuda_scal + procedure, pass(x) :: nrm2 => z_cuda_nrm2 + procedure, pass(x) :: amax => z_cuda_amax + procedure, pass(x) :: asum => z_cuda_asum + procedure, pass(x) :: absval1 => z_cuda_absval1 + procedure, pass(x) :: absval2 => z_cuda_absval2 + + final :: z_cuda_vect_finalize + end type psb_z_vect_cuda + + public :: psb_z_vect_cuda_ + private :: constructor + interface psb_z_vect_cuda_ + module procedure constructor + end interface psb_z_vect_cuda_ + +contains + + function constructor(x) result(this) + complex(psb_dpk_) :: x(:) + type(psb_z_vect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x),info) + + end function constructor + + subroutine z_cuda_device_wait() + call psb_cudaSync() + end subroutine z_cuda_device_wait + + subroutine z_cuda_new_buffer(n,x,info) + use psb_realloc_mod + use psb_cuda_env_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + + if (psb_cuda_DeviceHasUVA()) then + if (allocated(x%combuf)) then + if (size(x%combuf) idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + ! + ! Only need a sync in this branch; in the others + ! cudamemCpy acts as a sync point. + ! + if (allocated(x%pinned_buffer)) then + if (size(x%pinned_buffer) < n) then + call inner_unregister(x%pinned_buffer) + deallocate(x%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(x%pinned_buffer)) then + allocate(x%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(x%pinned_buffer,x%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_p_buf, 1) + call psb_cudaSync() + y(1:n) = x%pinned_buffer(1:n) + + else + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDoubleComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(x%dt_buf,y,n) + + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + + if (x%i_buf_sz < ni) then + if (c_associated(x%i_buf)) then + call freeInt(x%i_buf) + x%i_buf = c_null_ptr + end if + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (allocated(x%buffer)) then + if (size(x%buffer) < n) then + deallocate(x%buffer, stat=info) + end if + end if + + if (.not.allocated(x%buffer)) then + allocate(x%buffer(n),stat=info) + end if + + if (x%dt_buf_sz < n) then + if (c_associated(x%dt_buf)) then + call freeDoubleComplex(x%dt_buf) + x%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(x%dt_buf,n) + x%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(x%i_buf,ii%v,ni) + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplex(x%deviceVect,& + & 0, n, i, x%i_buf, 1, x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(x%dt_buf,y,n) + + end select + + end subroutine z_cuda_gthzv_x + + subroutine z_cuda_gthzbuf(i,n,idx,x) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i,n + class(psb_i_base_vect_type) :: idx + class(psb_z_vect_cuda) :: x + integer :: info, ni + + info = 0 +!!$ write(0,*) 'Starting gth_zbuf' + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'gthzbuf') + return + end if + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (x%is_host()) call x%sync() + + if (psb_cuda_DeviceHasUVA()) then + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_p_buf, 1) + + else + info = igathMultiVecDeviceDoubleComplexVecIdx(x%deviceVect,& + & 0, n, i, ii%deviceVect, i,x%dt_buf, 1) + if (info == 0) & + & info = readDoubleComplex(i,x%dt_buf,x%combuf(i:),n,1) + endif + + class default + ! Do not go for brute force, but move the index vector + ni = size(ii%v) + info = 0 + if (.not.c_associated(x%i_buf)) then + info = allocateInt(x%i_buf,ni) + x%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,x%i_buf,ii%v(i:),n,1) + + if (info == 0) & + & info = igathMultiVecDeviceDoubleComplex(x%deviceVect,& + & 0, n, i, x%i_buf, i,x%dt_buf, 1) + + if (info == 0) & + & info = readDoubleComplex(i,x%dt_buf,x%combuf(i:),n,1) + + end select + + end subroutine z_cuda_gthzbuf + + subroutine z_cuda_sctb(n,idx,x,beta,y) + implicit none + !use psb_const_mod + integer(psb_ipk_) :: n, idx(:) + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_cuda) :: y + integer(psb_ipk_) :: info + + if (n == 0) return + + if (y%is_dev()) call y%sync() + + call y%psb_z_base_vect_type%sctb(n,idx,x,beta) + call y%set_host() + + end subroutine z_cuda_sctb + + subroutine z_cuda_sctb_x(i,n,idx,x,beta,y) + use psb_cuda_env_mod + use psi_serial_mod + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_cuda) :: y + integer :: info, ni + + select type(ii=> idx) + class is (psb_i_vect_cuda) + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + + ! + if (psb_cuda_DeviceHasUVA()) then + if (allocated(y%pinned_buffer)) then + if (size(y%pinned_buffer) < n) then + call inner_unregister(y%pinned_buffer) + deallocate(y%pinned_buffer, stat=info) + end if + end if + + if (.not.allocated(y%pinned_buffer)) then + allocate(y%pinned_buffer(n),stat=info) + if (info == 0) info = inner_register(y%pinned_buffer,y%dt_p_buf) + if (info /= 0) & + & write(0,*) 'Error from inner_register ',info + endif + y%pinned_buffer(1:n) = x(1:n) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_p_buf, 1,beta) + else + + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDoubleComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + info = writeDoubleComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, 1, y%dt_buf, 1,beta) + + end if + + class default + ni = size(ii%v) + + if (y%i_buf_sz < ni) then + if (c_associated(y%i_buf)) then + call freeInt(y%i_buf) + y%i_buf = c_null_ptr + end if + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (allocated(y%buffer)) then + if (size(y%buffer) < n) then + deallocate(y%buffer, stat=info) + end if + end if + + if (.not.allocated(y%buffer)) then + allocate(y%buffer(n),stat=info) + end if + + if (y%dt_buf_sz < n) then + if (c_associated(y%dt_buf)) then + call freeDoubleComplex(y%dt_buf) + y%dt_buf = c_null_ptr + end if + info = allocateDoubleComplex(y%dt_buf,n) + y%dt_buf_sz=n + end if + + if (info == 0) & + & info = writeInt(y%i_buf,ii%v(i:i+n-1),n) + info = writeDoubleComplex(y%dt_buf,x,n) + info = iscatMultiVecDeviceDoubleComplex(y%deviceVect,& + & 0, n, 1, y%i_buf, 1, y%dt_buf, 1,beta) + + + end select + ! + ! Need a sync here to make sure we are not reallocating + ! the buffers before iscatMulti has finished. + ! + call psb_cudaSync() + call y%set_dev() + + end subroutine z_cuda_sctb_x + + subroutine z_cuda_sctb_buf(i,n,idx,beta,y) + use psi_serial_mod + use psb_cuda_env_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta + class(psb_z_vect_cuda) :: y + integer(psb_ipk_) :: info, ni + +!!$ write(0,*) 'Starting sctb_buf' + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_,'sctb_buf') + return + end if + + + select type(ii=> idx) + class is (psb_i_vect_cuda) + + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() + if (psb_cuda_DeviceHasUVA()) then + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_p_buf, 1,beta) + else + info = writeDoubleComplex(i,y%dt_buf,y%combuf(i:),n,1) + info = iscatMultiVecDeviceDoubleComplexVecIdx(y%deviceVect,& + & 0, n, i, ii%deviceVect, i, y%dt_buf, 1,beta) + + end if + + class default + !call y%sct(n,ii%v(i:),x,beta) + ni = size(ii%v) + info = 0 + if (.not.c_associated(y%i_buf)) then + info = allocateInt(y%i_buf,ni) + y%i_buf_sz=ni + end if + if (info == 0) & + & info = writeInt(i,y%i_buf,ii%v(i:),n,1) + if (info == 0) & + & info = writeDoubleComplex(i,y%dt_buf,y%combuf(i:),n,1) + if (info == 0) info = iscatMultiVecDeviceDoubleComplex(y%deviceVect,& + & 0, n, i, y%i_buf, i, y%dt_buf, 1,beta) + end select +!!$ write(0,*) 'Done sctb_buf' + + end subroutine z_cuda_sctb_buf + + + subroutine z_cuda_bld_x(x,this) + use psb_base_mod + complex(psb_dpk_), intent(in) :: this(:) + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this),x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_cuda_bld_x',& + & i_err=(/size(this),izero,izero,izero,izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + + end subroutine z_cuda_bld_x + + subroutine z_cuda_bld_mn(x,n) + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n,info) + if (info /= 0) then + call psb_errpush(info,'z_cuda_bld_n',i_err=(/n,n,n,n,n/)) + end if + + end subroutine z_cuda_bld_mn + + subroutine z_cuda_set_host(x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine z_cuda_set_host + + subroutine z_cuda_set_dev(x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine z_cuda_set_dev + + subroutine z_cuda_set_sync(x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine z_cuda_set_sync + + function z_cuda_is_dev(x) result(res) + implicit none + class(psb_z_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_cuda_is_dev + + function z_cuda_is_host(x) result(res) + implicit none + class(psb_z_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_cuda_is_host + + function z_cuda_is_sync(x) result(res) + implicit none + class(psb_z_vect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_cuda_is_sync + + + function z_cuda_get_nrows(x) result(res) + implicit none + class(psb_z_vect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function z_cuda_get_nrows + + function z_cuda_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'zGPU' + end function z_cuda_get_fmt + + subroutine z_cuda_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_z_vect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n,x%v,info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_cuda_all',& + & i_err=(/n,n,n,n,n/)) + end if + end subroutine z_cuda_all + + subroutine z_cuda_zero(x) + use psi_serial_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + ! Since we are overwriting, make sure to do it + ! on the GPU side + call x%set_dev() + call x%set_scal(zzero) + end subroutine z_cuda_zero + + subroutine z_cuda_asb_m(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + if (x%is_dev()) then + nd = getMultiVecDeviceSize(x%deviceVect) + if (nd < n) then + call x%sync() + call x%psb_z_base_vect_type%asb(n,info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else ! + if (x%get_nrows() size(x%v)).or.(n > x%get_nrows())) then +!!$ write(0,*) 'Incoherent situation : sizes',n,size(x%v),x%get_nrows() + call psb_realloc(n,x%v,info) + end if + info = readMultiVecDevice(x%deviceVect,x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info=psb_err_internal_error_ + call psb_errpush(info,'z_cuda_sync') + end if + + end subroutine z_cuda_sync + + subroutine z_cuda_free(x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (c_associated(x%deviceVect)) then +!!$ write(0,*)'d_cuda_free Calling freeMultiVecDevice' + call freeMultiVecDevice(x%deviceVect) + x%deviceVect=c_null_ptr + end if + call x%free_buffer(info) + call x%set_sync() + end subroutine z_cuda_free + + subroutine z_cuda_set_scal(x,val,first,last) + class(psb_z_vect_cuda), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: info, first_, last_ + + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1,first) + if (present(last)) last_ = min(last,last_) + + info = setScalDevice(val,first_,last_,1,x%deviceVect) + call x%set_dev() + + end subroutine z_cuda_set_scal +!!$ +!!$ subroutine z_cuda_set_vect(x,val) +!!$ class(psb_z_vect_cuda), intent(inout) :: x +!!$ complex(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_) :: nr +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_z_base_vect_type%set_vect(val) +!!$ call x%set_host() +!!$ +!!$ end subroutine z_cuda_set_vect + + + + function z_cuda_dot_v(n,x,y) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + + res = zzero + ! + ! Note: this is the gpu implementation. + ! When we get here, we are sure that X is of + ! TYPE psb_z_vect + ! + select type(yy => y) + type is (psb_z_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n,x%v,1,yy%v,1) + type is (psb_z_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info,'z_cuda_dot_v') + end if + + class default + ! y%sync is done in dot_a + call x%sync() + res = y%dot(n,x%v) + end select + + end function z_cuda_dot_v + + function z_cuda_dot_a(n,x,y) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + complex(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n,y,1,x%v,1) + + end function z_cuda_dot_a + + subroutine z_cuda_axpby_v(m,alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_cuda), intent(inout) :: y + complex(psb_dpk_), intent (in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny + + info = psb_success_ + + select type(xx => x) + type is (psb_z_vect_cuda) + ! Do something different here + if ((beta /= zzero).and.y%is_host())& + & call y%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(y%deviceVect) + if ((nx x) + class is (psb_z_vect_cuda) + select type(yy => y) + class is (psb_z_vect_cuda) + select type(zz => z) + class is (psb_z_vect_cuda) + ! Do something different here + if ((beta /= zzero).and.yy%is_host())& + & call yy%sync() + if ((delta /= zzero).and.zz%is_host())& + & call zz%sync() + if (xx%is_host()) call xx%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + if ((nx x) + class is (psb_z_vect_cuda) + select type(yy => y) + class is (psb_z_vect_cuda) + select type(zz => z) + class is (psb_z_vect_cuda) + ! Do something different here + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if (zz%is_host()) call zz%sync() + if (w%is_host()) call w%sync() + nx = getMultiVecDeviceSize(xx%deviceVect) + ny = getMultiVecDeviceSize(yy%deviceVect) + nz = getMultiVecDeviceSize(zz%deviceVect) + nw = getMultiVecDeviceSize(w%deviceVect) + if ((nx x) + type is (psb_z_base_vect_type) + if (y%is_dev()) call y%sync() + do i=1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + type is (psb_z_vect_cuda) + ! Do something different here + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + info = axyMultiVecDevice(n,zone,xx%deviceVect,y%deviceVect) + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + call y%mlt(xx%v,info) + call y%set_host() + end select + + end subroutine z_cuda_mlt_v + + subroutine z_cuda_mlt_a(x, y, info) + use psi_serial_mod + implicit none + complex(psb_dpk_), intent(in) :: x(:) + class(psb_z_vect_cuda), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync() + call y%psb_z_base_vect_type%mlt(x,info) + ! set_host() is invoked in the base method + end subroutine z_cuda_mlt_a + + subroutine z_cuda_mlt_a_2(alpha,x,y,beta,z,info) + use psi_serial_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha,beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(in) :: y(:) + class(psb_z_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) + ! set_host() is invoked in the base method + end subroutine z_cuda_mlt_a_2 + + subroutine z_cuda_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha,beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_cuda), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + if (.false.) then + ! These are present just for coherence with the + ! complex versions; they do nothing here. + conjgx_=.false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') + conjgy_=.false. + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') + end if + + n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) + + ! + ! Need to reconsider BETA in the GPU side + ! of things. + ! + info = 0 + select type(xx => x) + type is (psb_z_vect_cuda) + select type (yy => y) + type is (psb_z_vect_cuda) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= zzero).and.(z%is_host())) call z%sync() + info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& + & yy%deviceVect,beta,z%deviceVect) + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= zzero).and.(z%is_dev())) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,xx,yy,beta,info) + call z%set_host() + end select + + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= zzero).and.(z%is_dev())) call z%sync() + call z%psb_z_base_vect_type%mlt(alpha,x,y,beta,info) + call z%set_host() + end select + end subroutine z_cuda_mlt_v_2 + + subroutine z_cuda_scal(alpha, x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + complex(psb_dpk_), intent (in) :: alpha + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = scalMultiVecDevice(alpha,x%deviceVect) + call x%set_dev() + end subroutine z_cuda_scal + + + function z_cuda_nrm2(n,x) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + ! WARNING: this should be changed. + if (x%is_host()) call x%sync() + info = nrm2MultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_cuda_nrm2 + + function z_cuda_amax(n,x) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = amaxMultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_cuda_amax + + function z_cuda_asum(n,x) result(res) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + info = asumMultiVecDeviceComplex(res,n,x%deviceVect) + + end function z_cuda_asum + + subroutine z_cuda_absval1(x) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + if (x%is_host()) call x%sync() + n=x%get_nrows() + info = absMultiVecDevice(n,zone,x%deviceVect) + + end subroutine z_cuda_absval1 + + subroutine z_cuda_absval2(x,y) + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: info + + n=min(x%get_nrows(),y%get_nrows()) + select type (yy=> y) + class is (psb_z_vect_cuda) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + info = absMultiVecDevice(n,zone,x%deviceVect,yy%deviceVect) + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_z_base_vect_type%absval(y) + end select + end subroutine z_cuda_absval2 + + + subroutine z_cuda_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine z_cuda_vect_finalize + + subroutine z_cuda_ins_v(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_z_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_cuda + + info = 0 + if (psb_errstatus_fatal()) return + + done_cuda = .false. + select type(virl => irl) + class is (psb_i_vect_cuda) + select type(vval => val) + class is (psb_z_vect_cuda) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + info = geinsMultiVecDeviceDoubleComplex(n,virl%deviceVect,& + & vval%deviceVect,dupl,1,x%deviceVect) + call x%set_dev() + done_cuda=.true. + end select + end select + + if (.not.done_cuda) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n,irl%v,val%v,dupl,info) + end if + + if (info /= 0) then + call psb_errpush(info,'cuda_vect_ins') + return + end if + + end subroutine z_cuda_ins_v + + subroutine z_cuda_ins_a(n,irl,val,dupl,x,info) + use psi_serial_mod + implicit none + class(psb_z_vect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_z_base_vect_type%ins(n,irl,val,dupl,info) + call x%set_host() + + end subroutine z_cuda_ins_a + +end module psb_z_cuda_vect_mod + + +! +! Multivectors +! + + + +module psb_z_cuda_multivect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_z_multivect_mod + use psb_z_base_multivect_mod + use psb_cuda_env_mod + use psb_i_multivect_mod + use psb_i_cuda_multivect_mod + use psb_z_vectordev_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_multivect_type) :: psb_z_multivect_cuda + integer(psb_ipk_) :: state = is_host, m_nrows=0, m_ncols=0 + type(c_ptr) :: deviceVect = c_null_ptr + real(c_double), allocatable :: buffer(:,:) + type(c_ptr) :: dt_buf = c_null_ptr + contains + procedure, pass(x) :: get_nrows => z_cuda_multi_get_nrows + procedure, pass(x) :: get_ncols => z_cuda_multi_get_ncols + procedure, nopass :: get_fmt => z_cuda_multi_get_fmt +!!$ procedure, pass(x) :: dot_v => z_cuda_multi_dot_v +!!$ procedure, pass(x) :: dot_a => z_cuda_multi_dot_a +!!$ procedure, pass(y) :: axpby_v => z_cuda_multi_axpby_v +!!$ procedure, pass(y) :: axpby_a => z_cuda_multi_axpby_a +!!$ procedure, pass(y) :: mlt_v => z_cuda_multi_mlt_v +!!$ procedure, pass(y) :: mlt_a => z_cuda_multi_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => z_cuda_multi_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => z_cuda_multi_mlt_v_2 +!!$ procedure, pass(x) :: scal => z_cuda_multi_scal +!!$ procedure, pass(x) :: nrm2 => z_cuda_multi_nrm2 +!!$ procedure, pass(x) :: amax => z_cuda_multi_amax +!!$ procedure, pass(x) :: asum => z_cuda_multi_asum + procedure, pass(x) :: all => z_cuda_multi_all + procedure, pass(x) :: zero => z_cuda_multi_zero + procedure, pass(x) :: asb => z_cuda_multi_asb + procedure, pass(x) :: sync => z_cuda_multi_sync + procedure, pass(x) :: sync_space => z_cuda_multi_sync_space + procedure, pass(x) :: bld_x => z_cuda_multi_bld_x + procedure, pass(x) :: bld_n => z_cuda_multi_bld_n + procedure, pass(x) :: free => z_cuda_multi_free + procedure, pass(x) :: ins => z_cuda_multi_ins + procedure, pass(x) :: is_host => z_cuda_multi_is_host + procedure, pass(x) :: is_dev => z_cuda_multi_is_dev + procedure, pass(x) :: is_sync => z_cuda_multi_is_sync + procedure, pass(x) :: set_host => z_cuda_multi_set_host + procedure, pass(x) :: set_dev => z_cuda_multi_set_dev + procedure, pass(x) :: set_sync => z_cuda_multi_set_sync + procedure, pass(x) :: set_scal => z_cuda_multi_set_scal + procedure, pass(x) :: set_vect => z_cuda_multi_set_vect +!!$ procedure, pass(x) :: gthzv_x => z_cuda_multi_gthzv_x +!!$ procedure, pass(y) :: sctb => z_cuda_multi_sctb +!!$ procedure, pass(y) :: sctb_x => z_cuda_multi_sctb_x + final :: z_cuda_multi_vect_finalize + end type psb_z_multivect_cuda + + public :: psb_z_multivect_cuda + private :: constructor + interface psb_z_multivect_cuda + module procedure constructor + end interface + +contains + + function constructor(x) result(this) + complex(psb_dpk_) :: x(:,:) + type(psb_z_multivect_cuda) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x,1),size(x,2),info) + + end function constructor + + +!!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i,n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_dpk_) :: y(:) +!!$ class(psb_z_multivect_cuda) :: x +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (x%is_host()) call x%sync() +!!$ +!!$ if (allocated(x%buffer)) then +!!$ if (size(x%buffer) < n) then +!!$ call inner_unregister(x%buffer) +!!$ deallocate(x%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(x%buffer)) then +!!$ allocate(x%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(x%buffer,x%dt_buf) +!!$ endif +!!$ info = igathMultiVecDeviceDouble(x%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, x%dt_buf, 1) +!!$ call psb_cudaSync() +!!$ y(1:n) = x%buffer(1:n) +!!$ +!!$ class default +!!$ call x%gth(n,ii%v(i:),y) +!!$ end select +!!$ +!!$ +!!$ end subroutine z_cuda_multi_gthzv_x +!!$ +!!$ +!!$ +!!$ subroutine z_cuda_multi_sctb(n,idx,x,beta,y) +!!$ implicit none +!!$ !use psb_const_mod +!!$ integer(psb_ipk_) :: n, idx(:) +!!$ complex(psb_dpk_) :: beta, x(:) +!!$ class(psb_z_multivect_cuda) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (n == 0) return +!!$ +!!$ if (y%is_dev()) call y%sync() +!!$ +!!$ call y%psb_z_base_multivect_type%sctb(n,idx,x,beta) +!!$ call y%set_host() +!!$ +!!$ end subroutine z_cuda_multi_sctb +!!$ +!!$ subroutine z_cuda_multi_sctb_x(i,n,idx,x,beta,y) +!!$ use psi_serial_mod +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_multivect_type) :: idx +!!$ complex(psb_dpk_) :: beta, x(:) +!!$ class(psb_z_multivect_cuda) :: y +!!$ +!!$ select type(ii=> idx) +!!$ class is (psb_i_vect_cuda) +!!$ if (ii%is_host()) call ii%sync() +!!$ if (y%is_host()) call y%sync() +!!$ +!!$ if (allocated(y%buffer)) then +!!$ if (size(y%buffer) < n) then +!!$ call inner_unregister(y%buffer) +!!$ deallocate(y%buffer, stat=info) +!!$ end if +!!$ end if +!!$ +!!$ if (.not.allocated(y%buffer)) then +!!$ allocate(y%buffer(n),stat=info) +!!$ if (info == 0) info = inner_register(y%buffer,y%dt_buf) +!!$ endif +!!$ y%buffer(1:n) = x(1:n) +!!$ info = iscatMultiVecDeviceDouble(y%deviceVect,& +!!$ & 0, i, n, ii%deviceVect, y%dt_buf, 1,beta) +!!$ +!!$ call y%set_dev() +!!$ call psb_cudaSync() +!!$ +!!$ class default +!!$ call y%sct(n,ii%v(i:),x,beta) +!!$ end select +!!$ +!!$ end subroutine z_cuda_multi_sctb_x + + + subroutine z_cuda_multi_bld_x(x,this) + use psb_base_mod + complex(psb_dpk_), intent(in) :: this(:,:) + class(psb_z_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info, m, n + + m=size(this,1) + n=size(this,2) + x%m_nrows = m + x%m_ncols = n + call psb_realloc(m,n,x%v,info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_cuda_multi_bld_x',& + & i_err=(/size(this,1),size(this,2),izero,izero,izero,izero/)) + end if + x%v(1:m,1:n) = this(1:m,1:n) + call x%set_host() + call x%sync() + + end subroutine z_cuda_multi_bld_x + + subroutine z_cuda_multi_bld_n(x,m,n) + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_cuda), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(m,n,info) + if (info /= 0) then + call psb_errpush(info,'z_cuda_multi_bld_n',i_err=(/m,n,n,n,n/)) + end if + + end subroutine z_cuda_multi_bld_n + + + subroutine z_cuda_multi_set_host(x) + implicit none + class(psb_z_multivect_cuda), intent(inout) :: x + + x%state = is_host + end subroutine z_cuda_multi_set_host + + subroutine z_cuda_multi_set_dev(x) + implicit none + class(psb_z_multivect_cuda), intent(inout) :: x + + x%state = is_dev + end subroutine z_cuda_multi_set_dev + + subroutine z_cuda_multi_set_sync(x) + implicit none + class(psb_z_multivect_cuda), intent(inout) :: x + + x%state = is_sync + end subroutine z_cuda_multi_set_sync + + function z_cuda_multi_is_dev(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_cuda_multi_is_dev + + function z_cuda_multi_is_host(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_cuda_multi_is_host + + function z_cuda_multi_is_sync(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_cuda_multi_is_sync + + + function z_cuda_multi_get_nrows(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_nrows + + end function z_cuda_multi_get_nrows + + function z_cuda_multi_get_ncols(x) result(res) + implicit none + class(psb_z_multivect_cuda), intent(in) :: x + integer(psb_ipk_) :: res + + res = x%m_ncols + + end function z_cuda_multi_get_ncols + + function z_cuda_multi_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'zGPU' + end function z_cuda_multi_get_fmt + +!!$ function z_cuda_multi_dot_v(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ class(psb_z_base_multivect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_dpk_) :: res +!!$ complex(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ +!!$ res = zzero +!!$ ! +!!$ ! Note: this is the gpu implementation. +!!$ ! When we get here, we are sure that X is of +!!$ ! TYPE psb_z_vect +!!$ ! +!!$ select type(yy => y) +!!$ type is (psb_z_base_multivect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,x%v,1,yy%v,1) +!!$ type is (psb_z_multivect_cuda) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ info = dotMultiVecDevice(res,n,x%deviceVect,yy%deviceVect) +!!$ if (info /= 0) then +!!$ info = psb_err_internal_error_ +!!$ call psb_errpush(info,'z_cuda_multi_dot_v') +!!$ end if +!!$ +!!$ class default +!!$ ! y%sync is done in dot_a +!!$ call x%sync() +!!$ res = y%dot(n,x%v) +!!$ end select +!!$ +!!$ end function z_cuda_multi_dot_v +!!$ +!!$ function z_cuda_multi_dot_a(n,x,y) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ complex(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ complex(psb_dpk_) :: res +!!$ complex(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n,y,1,x%v,1) +!!$ +!!$ end function z_cuda_multi_dot_a +!!$ +!!$ subroutine z_cuda_multi_axpby_v(m,alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_z_base_multivect_type), intent(inout) :: x +!!$ class(psb_z_multivect_cuda), intent(inout) :: y +!!$ complex(psb_dpk_), intent (in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_z_base_multivect_type) +!!$ if ((beta /= zzero).and.(y%is_dev()))& +!!$ & call y%sync() +!!$ call psb_geaxpby(m,alpha,xx%v,beta,y%v,info) +!!$ call y%set_host() +!!$ type is (psb_z_multivect_cuda) +!!$ ! Do something different here +!!$ if ((beta /= zzero).and.y%is_host())& +!!$ & call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ nx = getMultiVecDeviceSize(xx%deviceVect) +!!$ ny = getMultiVecDeviceSize(y%deviceVect) +!!$ if ((nx x) +!!$ type is (psb_z_base_multivect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ do i=1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ type is (psb_z_multivect_cuda) +!!$ ! Do something different here +!!$ if (y%is_host()) call y%sync() +!!$ if (xx%is_host()) call xx%sync() +!!$ info = axyMultiVecDevice(n,done,xx%deviceVect,y%deviceVect) +!!$ call y%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call y%mlt(xx%v,info) +!!$ call y%set_host() +!!$ end select +!!$ +!!$ end subroutine z_cuda_multi_mlt_v +!!$ +!!$ subroutine z_cuda_multi_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_z_multivect_cuda), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ call y%sync() +!!$ call y%psb_z_base_multivect_type%mlt(x,info) +!!$ call y%set_host() +!!$ end subroutine z_cuda_multi_mlt_a +!!$ +!!$ subroutine z_cuda_multi_mlt_a_2(alpha,x,y,beta,z,info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: alpha,beta +!!$ complex(psb_dpk_), intent(in) :: x(:) +!!$ complex(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_z_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end subroutine z_cuda_multi_mlt_a_2 +!!$ +!!$ subroutine z_cuda_multi_mlt_v_2(alpha,x,y, beta,z,info,conjgx,conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: alpha,beta +!!$ class(psb_z_base_multivect_type), intent(inout) :: x +!!$ class(psb_z_base_multivect_type), intent(inout) :: y +!!$ class(psb_z_multivect_cuda), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ if (.false.) then +!!$ ! These are present just for coherence with the +!!$ ! complex versions; they do nothing here. +!!$ conjgx_=.false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx)=='C') +!!$ conjgy_=.false. +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy)=='C') +!!$ end if +!!$ +!!$ n = min(x%get_nrows(),y%get_nrows(),z%get_nrows()) +!!$ +!!$ ! +!!$ ! Need to reconsider BETA in the GPU side +!!$ ! of things. +!!$ ! +!!$ info = 0 +!!$ select type(xx => x) +!!$ type is (psb_z_multivect_cuda) +!!$ select type (yy => y) +!!$ type is (psb_z_multivect_cuda) +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ ! Z state is irrelevant: it will be done on the GPU. +!!$ info = axybzMultiVecDevice(n,alpha,xx%deviceVect,& +!!$ & yy%deviceVect,beta,z%deviceVect) +!!$ call z%set_dev() +!!$ class default +!!$ call xx%sync() +!!$ call yy%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,xx,yy,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ +!!$ class default +!!$ call x%sync() +!!$ call y%sync() +!!$ call z%psb_z_base_multivect_type%mlt(alpha,x,y,beta,info) +!!$ call z%set_host() +!!$ end select +!!$ end subroutine z_cuda_multi_mlt_v_2 + + + subroutine z_cuda_multi_set_scal(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_z_base_multivect_type%set_scal(val) + call x%set_host() + end subroutine z_cuda_multi_set_scal + + subroutine z_cuda_multi_set_vect(x,val) + class(psb_z_multivect_cuda), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val(:,:) + integer(psb_ipk_) :: nr + integer(psb_ipk_) :: info + + if (x%is_dev()) call x%sync() + call x%psb_z_base_multivect_type%set_vect(val) + call x%set_host() + + end subroutine z_cuda_multi_set_vect + + + +!!$ subroutine z_cuda_multi_scal(alpha, x) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ complex(psb_dpk_), intent (in) :: alpha +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ call x%psb_z_base_multivect_type%scal(alpha) +!!$ call x%set_host() +!!$ end subroutine z_cuda_multi_scal +!!$ +!!$ +!!$ function z_cuda_multi_nrm2(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ ! WARNING: this should be changed. +!!$ if (x%is_host()) call x%sync() +!!$ info = nrm2MultiVecDevice(res,n,x%deviceVect) +!!$ +!!$ end function z_cuda_multi_nrm2 +!!$ +!!$ function z_cuda_multi_amax(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = maxval(abs(x%v(1:n))) +!!$ +!!$ end function z_cuda_multi_amax +!!$ +!!$ function z_cuda_multi_asum(n,x) result(res) +!!$ implicit none +!!$ class(psb_z_multivect_cuda), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = sum(abs(x%v(1:n))) +!!$ +!!$ end function z_cuda_multi_asum + + subroutine z_cuda_multi_all(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_cuda), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(m,n,x%v,info,pad=zzero) + x%m_nrows = m + x%m_ncols = n + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info=psb_err_alloc_request_ + call psb_errpush(info,'z_cuda_multi_all',& + & i_err=(/m,n,n,n,n/)) + end if + end subroutine z_cuda_multi_all + + subroutine z_cuda_multi_zero(x) + use psi_serial_mod + implicit none + class(psb_z_multivect_cuda), intent(inout) :: x + + if (allocated(x%v)) x%v=zzero + call x%set_host() + end subroutine z_cuda_multi_zero + + subroutine z_cuda_multi_asb(m,n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_multivect_cuda), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nd, nc + + + x%m_nrows = m + x%m_ncols = n + if (x%is_host()) then + call x%psb_z_base_multivect_type%asb(m,n,info) + if (info == psb_success_) call x%sync_space(info) + else if (x%is_dev()) then + nd = getMultiVecDevicePitch(x%deviceVect) + nc = getMultiVecDeviceCount(x%deviceVect) + if ((nd < m).or.(nc +#include + +#include +#include +#include "fcusparse.h" + +#include "scusparse.h" +#include "fcusparse_dat.h" +#include "fcusparse_fct.h" diff --git a/cuda/scusparse.h b/cuda/scusparse.h new file mode 100644 index 00000000..479f5069 --- /dev/null +++ b/cuda/scusparse.h @@ -0,0 +1,100 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef SCUSPARSE_ +#define SCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" + +/* Double precision real */ +#define TYPE float +#define CUSPARSE_BASE_TYPE CUDA_R_32F +#define T_CSRGDeviceMat s_CSRGDeviceMat +#define T_Cmat s_Cmat +#define T_spmvCSRGDevice s_spmvCSRGDevice +#define T_spsvCSRGDevice s_spsvCSRGDevice +#define T_CSRGDeviceAlloc s_CSRGDeviceAlloc +#define T_CSRGDeviceFree s_CSRGDeviceFree +#define T_CSRGHost2Device s_CSRGHost2Device +#define T_CSRGDevice2Host s_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode s_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType s_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms s_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis s_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseScsrmv +#define cusparseTcsrsv_solve cusparseScsrsv_solve +#define cusparseTcsrsv_analysis cusparseScsrsv_analysis +#define T_HYBGDeviceMat s_HYBGDeviceMat +#define T_Hmat s_Hmat +#define T_HYBGDeviceFree s_HYBGDeviceFree +#define T_spmvHYBGDevice s_spmvHYBGDevice +#define T_HYBGDeviceAlloc s_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType s_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase s_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType s_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode s_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis s_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice s_spsvHYBGDevice +#define T_HYBGHost2Device s_HYBGHost2Device +#define cusparseThybmv cusparseShybmv +#define cusparseThybsv_solve cusparseShybsv_solve +#define cusparseThybsv_analysis cusparseShybsv_analysis +#define cusparseTcsr2hyb cusparseScsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType s_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase s_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis s_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseScsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseScsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseScsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer s_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr s_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr s_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr s_CSRGCreateSpMVDescr + +#endif + +#include "fcusparse.h" + +#endif diff --git a/cuda/spgpu/Makefile b/cuda/spgpu/Makefile new file mode 100644 index 00000000..85801942 --- /dev/null +++ b/cuda/spgpu/Makefile @@ -0,0 +1,39 @@ +TOP=../.. +include $(TOP)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOP)/lib +INCDIR=$(TOP)/include +MODDIR=$(TOP)/modules +LIBNAME=libspgpu.a + +OBJS=coo.o core.o dia.o ell.o hdia.o hell.o + +all: includes objs + +objs: $(OBJS) iobjs + +lib: objs iobjs ilib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +iobjs: + $(MAKE) -C kernels objs +ilib: + $(MAKE) -C kernels lib LIBNAME=$(LIBNAME) + +includes: + /bin/cp -p *.h $(INCDIR) + +clean: iclean + /bin/rm -fr $(OBJS) $(LIBNAME) +iclean: + $(MAKE) -C kernels clean +.c.o: + $(CC) $(CCOPT) $(CINCLUDES) $(CDEFINES) $(CUDA_INCLUDES) -c $< +.cpp.o: + $(CXX) $(CXXOPT) $(CXXINCLUDES) $(CXXDEFINES) $(CUDA_INCLUDES) -c $< -o $@ + +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) $(CUDA_INCLUDES) -c $< diff --git a/cuda/spgpu/coo.cpp b/cuda/spgpu/coo.cpp new file mode 100644 index 00000000..eaeba95e --- /dev/null +++ b/cuda/spgpu/coo.cpp @@ -0,0 +1,98 @@ +#include "coo_conv.h" +#include "core.h" + +#include + +// returns the number of non-zero blocks +int computeBcooSize(int blockRows, int blockCols, const int* rows, const int* cols, int nonZeros) +{ + // use a map to count al non zero blocks + std::map blocksPositions; + + int blockCount = 0; + + int i; + for (i=0; i::iterator it = blocksPositions.find(blockId); + + // not found + if(it == blocksPositions.end()) + { + blocksPositions[blockId] = blockCount; + ++blockCount; + } + } + + return blockCount; +} + + +void cooToBcoo(int* bRows, int* bCols, void* blockValues, /*int isBlockColumnMajor,*/ int blockRows, int blockCols, + const int* rows, const int* cols, const void* values, int nonZeros, spgpuType_t valuesType) +{ + // use a map to count al non zero blocks + std::map blocksPositions; + + int blockCount = 0; + + size_t elementSize = spgpuSizeOf(valuesType); + size_t blockElementSize = elementSize*blockRows*blockCols; + int i; + for (i=0; i::iterator it = blocksPositions.find(blockId); + + int blockPos; + + // not found + if(it == blocksPositions.end()) + { + blocksPositions[blockId] = blockCount; + blockPos = blockCount; + + bRows[blockCount] = blockRowId; + bCols[blockCount] = blockColId; + + memset((char*)blockValues + blockCount*blockElementSize, 0, blockElementSize); + + ++blockCount; + } + else + blockPos = it->second; + + int blockRowOffset = rowId % blockRows; + int blockColOffset = colId % blockCols; + + int blockOffset; + + //if (isBlockColumnMajor) + blockOffset = blockRowOffset + blockColOffset*blockRows; + /*else + blockOffset = blockRowOffset*blockCols + blockColOffset; + */ + + char* dest = (char*)blockValues + blockPos*blockElementSize + blockOffset*elementSize; + const char* src = (const char*)values + elementSize*i; + + memcpy(dest, src, elementSize); + } +} + + diff --git a/cuda/spgpu/coo_conv.h b/cuda/spgpu/coo_conv.h new file mode 100644 index 00000000..feccb63a --- /dev/null +++ b/cuda/spgpu/coo_conv.h @@ -0,0 +1,40 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include "core.h" + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int computeBcooSize(int blockRows, int blockCols, const int* rows, const int* cols, int nonZeros); + +// column-major format for blocks +void cooToBcoo(int* bRows, int* bCols, void* blockValues, /*int isBlockColumnMajor,*/ int blockRows, int blockCols, + const int* rows, const int* cols, const void* values, int nonZeros, spgpuType_t valuesType); + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/core.c b/cuda/spgpu/core.c new file mode 100644 index 00000000..e89aa2cf --- /dev/null +++ b/cuda/spgpu/core.c @@ -0,0 +1,109 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" +#include "stdlib.h" +#include "cuda_runtime.h" + +spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device) +{ + struct cudaDeviceProp deviceProperties; + cudaError_t err = cudaGetDeviceProperties(&deviceProperties, device); + + SpgpuHandleStruct* handle = (SpgpuHandleStruct*) malloc(sizeof(SpgpuHandleStruct)); + + int currentDevice; + cudaGetDevice(¤tDevice); + cudaSetDevice(device); + cudaStreamCreate(&handle->defaultStream); + handle->currentStream = handle->defaultStream; + cudaSetDevice(currentDevice); + + handle->device = device; + handle->warpSize = deviceProperties.warpSize; + handle->maxThreadsPerBlock = deviceProperties.maxThreadsPerBlock; + handle->multiProcessorCount = deviceProperties.multiProcessorCount; + handle->maxGridSizeX = deviceProperties.maxGridSize[0]; + handle->maxGridSizeY = deviceProperties.maxGridSize[1]; + handle->maxGridSizeZ = deviceProperties.maxGridSize[2]; + handle->capabilityMajor = deviceProperties.major; + handle->capabilityMinor = deviceProperties.minor; + + *pHandle = handle; + + if (err == cudaSuccess) + return SPGPU_SUCCESS; + else + return SPGPU_UNSPECIFIED; +} + +void spgpuDestroy(spgpuHandle_t pHandle) +{ + cudaStreamDestroy(pHandle->defaultStream); + + free((void*)pHandle); +} + +void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream) +{ + int currentDevice; + cudaGetDevice(¤tDevice); + cudaSetDevice(pHandle->device); + cudaStreamCreate(stream); + cudaSetDevice(currentDevice); +} + +void spgpuStreamDestroy(cudaStream_t stream) +{ + cudaStreamDestroy(stream); +} + +void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream) +{ + SpgpuHandleStruct* handle = (SpgpuHandleStruct*)pHandle; + + if (stream) + { + handle->currentStream = stream; + } + else + handle->currentStream = pHandle->defaultStream; +} + +cudaStream_t spgpuGetStream(spgpuHandle_t pHandle) +{ + SpgpuHandleStruct* handle = (SpgpuHandleStruct*)pHandle; + return handle->currentStream; +} + +size_t spgpuSizeOf(spgpuType_t typeCode) +{ + switch (typeCode) + { + case SPGPU_TYPE_INT: + return sizeof(int); + case SPGPU_TYPE_FLOAT: + return sizeof(float); + case SPGPU_TYPE_DOUBLE: + return sizeof(double); + case SPGPU_TYPE_COMPLEX_FLOAT: + return sizeof(cuFloatComplex); + case SPGPU_TYPE_COMPLEX_DOUBLE: + return sizeof(cuDoubleComplex); + default: + return 0; // error + } +} diff --git a/cuda/spgpu/core.h b/cuda/spgpu/core.h new file mode 100644 index 00000000..46d98f64 --- /dev/null +++ b/cuda/spgpu/core.h @@ -0,0 +1,185 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +/*! \mainpage The spGPU library documentation + * + * \section intro_sec Introduction + * + * spGPU is a set of custom matrix storages and CUDA kernels for sparse linear algebra computing on GPU. It isn't a replacement for cuBLAS/cuSPARSE that should be used for a full featured linear algebra environment on GPU.\n + * The main matrix storage used by spGPU is a GPU-friendly ELLpack format, as well as our HELL (Hacked ELLpack) format, an enhanced version of ELLpack with some interesting memory saving properties.\n + * HELL format provides a better memory storage compared to ELL (it avoids allocation inefficency provided by spikes in row sizes), while providing quite the same performances for sparse matrix-vector multiply routine.. + * + * \section install_sec How to build spgpu + * \subsection linuxbuild Linux (and other unix systems) + * cd spgpu/build/cmake\n + * sh configure.sh\n + * make + * \section cr_sec Copyright + * Copyright (C) 2010 - 2015\n + * Davide Barbieri - University of Rome Tor Vergata\n + * Valeria Cardellini - University of Rome Tor Vergata\n + * Salvatore Filippone - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or\n + * modify it under the terms of the GNU General Public License\n + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful,\n + * but WITHOUT ANY WARRANTY; without even the implied warranty of\n + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n + * GNU General Public License for more details. + */ + + +#include "driver_types.h" +#include "cuComplex.h" + +/** \addtogroup coreFun Core Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/// __host pointers reference host allocations (it's just a placeholder) +#define __host +/// __device pointers reference device allocations (it's just a placeholder) +#define __device + +/// The return code for synchronous functions +typedef int spgpuStatus_t; + +#define SPGPU_SUCCESS 0 +#define SPGPU_UNSUPPORTED 1 +#define SPGPU_UNSPECIFIED 2 +#define SPGPU_OUTOFMEMORY 3 + +/// Code to identify a primitive type +typedef int spgpuType_t; + +#define SPGPU_TYPE_INT 0 +#define SPGPU_TYPE_FLOAT 1 +#define SPGPU_TYPE_DOUBLE 2 +#define SPGPU_TYPE_COMPLEX_FLOAT 3 +#define SPGPU_TYPE_COMPLEX_DOUBLE 4 + +/// this struct should be modified only internally by spgpu +typedef struct spgpuHandleStruct { + /// the current stream used by every calls on spgpu routines (passing this handle) + cudaStream_t currentStream; + /// the default stream, created during the handle creation. + cudaStream_t defaultStream; + /// the device associated to this handle + int device; + /// the warp size for this device + int warpSize; + /// the max threads per block count for this device + int maxThreadsPerBlock; + /// the max size for the X coordinate of the grid dimensions + int maxGridSizeX; + /// the max size for the Y coordinate of the grid dimensions + int maxGridSizeY; + /// the max size for the Z coordinate of the grid dimensions + int maxGridSizeZ; + /// Number of SM + int multiProcessorCount; + // compute capability + int capabilityMajor; + int capabilityMinor; +} SpgpuHandleStruct; + +/// A spGPU handle represents a single CUDA device on your platform. +typedef const SpgpuHandleStruct* spgpuHandle_t; + +/** +* \fn spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device) +* Create a spgpu context for a GPU device. Every call to spgpu routines using this +* handle will execute on the same GPU. This is re-entrant, so it will work if used by multiple host threads. +* \param pHandle outputs the handle +* \param device id of the device to be used by this context +*/ +spgpuStatus_t spgpuCreate(spgpuHandle_t* pHandle, int device); + +/** +* \fn void spgpuDestroy(spgpuHandle_t pHandle) +* Destroy the spgpu context for pHandle. +* \param pHandle the handle previously created with spgpuCreate(). +*/ +void spgpuDestroy(spgpuHandle_t pHandle); + +/** +* \fn void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream) +* Create a cuda stream according to the device of the spgpu handle. +* \param stream outputs the new stream +* \param pHandle the handle used to obtain the device id for the stream +*/ +void spgpuStreamCreate(spgpuHandle_t pHandle, cudaStream_t* stream); + +/** +* \fn void spgpuStreamDestroy(cudaStream_t stream) +* Destroy a stream, previously created with spgpuStreamCreate(). +* \param stream the stream to destroy +*/ +void spgpuStreamDestroy(cudaStream_t stream); + +/** +* \fn void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream) +* Change the current stream for the handle pHandle. +* \param pHandle the handle to configure. +* \param stream the stream to use for next spgpu routines call. Use 0 to reset to the default stream. +*/ +void spgpuSetStream(spgpuHandle_t pHandle, cudaStream_t stream); + +/** +* \fn cudaStram_t spgpuGetStream(spgpuHandle_t pHandle) +* Get the current stream from the handle pHandle. +* \param pHandle the handle from which get the stream. +*/ +cudaStream_t spgpuGetStream(spgpuHandle_t pHandle); + +/** +* \fn size_t spgpuSizeOf(spgpuType_t typeCode) +* Returns the size, in bytes, of the type denoted by typeCode (e.g. 4 for SPGPU_TYPE_FLOAT, 8 for SPGPU_TYPE_DOUBLE). +* \param typeCode outputs the handle +*/ +size_t spgpuSizeOf(spgpuType_t typeCode); + +/* +typedef struct { +spgpuMatrix + +spgpuMatrixType_t MatrixType; +spgpuFillMode_t FillMode; +spgpuDiagType_t DiagType; +int baseIndex; +} spgpuMatrixDesc_t +*/ + +#define cuFloatComplex_isZero(a) (a.x == 0.0f && a.y == 0.0f) +#define cuDoubleComplex_isZero(a) (a.x == 0.0 && a.y == 0.0) +#define cuFloatComplex_isNotZero(a) (a.x != 0.0f || a.y != 0.0f) +#define cuDoubleComplex_isNotZero(a) (a.x != 0.0 || a.y != 0.0) + +#ifdef __cplusplus +} +#endif + +/** @}*/ + diff --git a/cuda/spgpu/debug.h b/cuda/spgpu/debug.h new file mode 100644 index 00000000..d5bc6de2 --- /dev/null +++ b/cuda/spgpu/debug.h @@ -0,0 +1,58 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "stdlib.h" + +#ifndef _WIN32 +#include + +inline +void printTrace (void) +{ + void *array[32]; + size_t size; + char **strings; + size_t i; + + size = backtrace (array, 32); + strings = backtrace_symbols (array, size); + + printf ("---- Obtained %zd stack frames.\n", size); + + for (i = 0; i < size; i++) + printf ("%s\n", strings[i]); + + free (strings); +} +#endif + +inline void __assert(int e, const char* w) +{ + if (!e) + { + printf("%s\n",w); + +#ifndef _WIN32 + printTrace(); +#endif + + exit(0); + } +} + diff --git a/cuda/spgpu/dia.c b/cuda/spgpu/dia.c new file mode 100644 index 00000000..cf761c5c --- /dev/null +++ b/cuda/spgpu/dia.c @@ -0,0 +1,105 @@ +#include "dia.h" +#include "dia_conv.h" +#include "stdlib.h" + +int computeDiaAllocPitch(int rowsCount) +{ + // returns a pitch good for indices and values + return ((rowsCount + 31)/32)*32; +} + +int computeDiaDiagonalsCount( + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices) +{ + int* diagIds = (int*)malloc((rowsCount + columnsCount - 1)*sizeof(int)); + int diagonalsCount = 0; + int i; + + for (i=0; i<(rowsCount + columnsCount - 1); ++i) + diagIds[i] = -1; + + for (i=0; i + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int computeDiaDiagonalsCount( + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices); + + +void coo2dia( + void* values, + int* offsets, + int valuesPitch, + int diagonals, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType); + + +/** +* \fn int computeDiaAllocPitch(int rowsCount) + * This function returns a pitch (in number of elements) that can be used to allocate the values array for DIA matrix format. + * \param rowsCount the rows count + * \return the pitch for an DIA matrix of rowsCount rows. +*/ +int computeDiaAllocPitch(int rowsCount); + + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/ell.c b/cuda/spgpu/ell.c new file mode 100644 index 00000000..33381c28 --- /dev/null +++ b/cuda/spgpu/ell.c @@ -0,0 +1,202 @@ +#include "ell.h" +#include "ell_conv.h" +#include "stdlib.h" + +void computeEllRowLenghts( + int *ellRowLengths, + int *ellMaxRowSize, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + int cooBaseIndex + ) +{ + // find the max number of non zero per row + int maxRowSize = 0; + int i; + for (i=0; i maxRowSize) + maxRowSize = currCount; + } + + *ellMaxRowSize = maxRowSize; +} + +int computeEllAllocPitch(int rowsCount) +{ + // returns a pitch good for indices and values + return ((rowsCount + 31)/32)*32; +} + +void cooToEll( + void *ellValues, + int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int ellMaxRowSize, + int ellBaseIndex, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ) +{ + + size_t elementSize = spgpuSizeOf(valuesType); + + // fill values and indices + int* currentPos = (int*)malloc(rowsCount*sizeof(int)); + int i; + + for (i=0; i dstRs[j]) { + app_dstRs[k] = dstRs[i]; + app_rIdx[k] = rIdx[i]; + + ++k; ++i; + } else { + app_dstRs[k] = dstRs[j]; + app_rIdx[k] = rIdx[j]; + + ++k; ++j; + } + } + + while (i<=center) + { + app_dstRs[k] = dstRs[i]; + app_rIdx[k] = rIdx[i]; + + ++k; ++i; + } + + while (j<=end) + { + app_dstRs[k] = dstRs[j]; + app_rIdx[k] = rIdx[j]; + + ++k; ++j; + } + + for (k=start; k<=end; k++) + { + dstRs[k] = app_dstRs[k-start]; + rIdx[k] = app_rIdx[k-start]; + } +} + +void mergesort(int *dstRs, int *rIdx, int size) { + int* app_dstRs = (int*)malloc(size*sizeof(int)); + int* app_rIdx = (int*)malloc(size*sizeof(int)); + + int sizetomerge=size-1; + size--; + int i; + int n=2; + + while (nsizetomerge) + merge (app_dstRs, app_rIdx, dstRs, rIdx, sizetomerge -((sizetomerge)%n),sizetomerge,size,size); + sizetomerge=sizetomerge-((sizetomerge+1)%n);} + n=n*2; + } + + if (size>sizetomerge) + merge (app_dstRs, app_rIdx, dstRs,rIdx,0,size-(size-sizetomerge),size,size); + + free(app_dstRs); + free(app_rIdx); +} + + + +void ellToOell( + int *rIdx, + void *dstEllValues, + int *dstEllIndices, + int *dstRs, + const void *srcEllValues, + const int *srcEllIndices, + const int *srcRs, + int ellValuesPitch, + int ellIndicesPitch, + int rowsCount, + spgpuType_t valuesType + ) +{ + size_t elementSize = spgpuSizeOf(valuesType); + + int i,j; + for (i=0; i + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void computeEllRowLenghts(int *ellRowLengths, int *ellMaxRowSize, int rowsCount, int nonZerosCount, const int* cooRowIndices, int cooBaseIndex) + * Compute the Ell row lengths array (and the greatest row size) from the COO matrix format. + * \param ellRowLengths Array of length rowsCount to be filled by the non zeros count for every matrix row + * \param ellMaxRowSize outputs the greatest row size (in non zeros) + * \param rowsCount the number of rows of the coo matrix to convert + * \param nonZerosCount the non zeros count of the coo matrix to convert + * \param cooRowIndices the row indices array for the coo matrix to convert + * \param cooBaseIndex the input base index (e.g. 0 for C, 1 for Fortran) + */ +void computeEllRowLenghts( + int *ellRowLengths, + int *ellMaxRowSize, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + int cooBaseIndex + ); + +/** +* \fn int computeEllAllocPitch(int rowsCount) + * This function returns a pitch (in number of elements) that can be used to allocate both indices and values arrays for ELL matrix format. + * \param rowsCount the rows count + * \return the pitch for an ELL matrix of rowsCount rows. +*/ +int computeEllAllocPitch(int rowsCount); + + +/** +* \fn void cooToEll(void *ellValues,int *ellIndices,int ellValuesPitch,int ellIndicesPitch,int ellMaxRowSize,int ellBaseIndex,int rowsCount,int nonZerosCount,const int* cooRowIndices,const int* cooColsIndices,const void* cooValues,int cooBaseIndex, spgpuType_t valuesType) + * Convert a matrix in COO format to a matrix in ELL format. + * The matrix is stored in column-major format. The ellValues and ellIndices sizes are ellMaxRowSize * pitch (pitch is in bytes). + * \param ellValues pointer to the area that will be filled by the non zero coefficients + * \param ellIndices pointer to the area that will be filled by the non zero indices + * \param ellValuesPitch the column-major allocation's pitch of ellValues (in number of elements) + * \param ellIndicesPitch the column-major allocation's pitch of ellIndices (in number of elements) + * \param ellMaxRowSize the greatest row size + * \param ellBaseIndex the desired base index for the ELL matrix (e.g. 0 for C, 1 for Fortran) + * \param rowsCount input matrix rows count + * \param nonZerosCount input matrix non zeros count + * \param cooRowIndices input matrix row indices pointer + * \param cooColsIndices input matrix column indices pointer + * \param cooValues input matrix non zeros values pointer + * \param cooBaseIndex input matrix base index + * \param valuesType the type for elements in ellValues and cooValues (i.e. SPGPU_TYPE_FLOAT or SPGPU_TYPE_DOUBLE) + */ +void cooToEll( + void *ellValues, + int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int ellMaxRowSize, + int ellBaseIndex, + int rowsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ); + +void ellToOell( + int *rIdx, + void *dstEllValues, + int *dstEllIndices, + int *dstRs, + const void *srcEllValues, + const int *srcEllIndices, + const int *srcRs, + int ellValuesPitch, + int ellIndicesPitch, + int rowsCount, + spgpuType_t valuesType + ); + +#ifdef __cplusplus +} +#endif + +/** @}*/ diff --git a/cuda/spgpu/hdia.cpp b/cuda/spgpu/hdia.cpp new file mode 100644 index 00000000..daf74173 --- /dev/null +++ b/cuda/spgpu/hdia.cpp @@ -0,0 +1,374 @@ +#include "hdia_conv.h" +#include "stdlib.h" +#include "string.h" + +#include +#include + +int getHdiaHacksCount(int hackSize, int rowsCount) +{ + return (rowsCount + hackSize - 1)/hackSize; +} + +void computeHdiaHackOffsets( + int *allocationHeight, + int *hackOffsets, + int hackSize, + const void* diaValues, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType + ) +{ + int i,r,s, hack; + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + size_t elementSize = spgpuSizeOf(valuesType); + + int hackHeight = 0; + + hackOffsets[0] = 0; + for (hack=0; hack= rowsCount) + break; + + const char* val = (char*)diaValues + elementSize*(row + i*diaValuesPitch); + + for (s=0; s= rowsCount) + break; + + const char* val = (const char*)diaValues + elementSize*(row + i*diaValuesPitch); + + for (s=0; s= rowsCount) + break; + + char* dest = (char*)hdiaValues + elementSize*((posOffset + i)*hackSize + r); + const char* src = (const char*)diaValues + elementSize*(row + diagPosInsideDia*diaValuesPitch); + + memcpy(dest, src, elementSize); + } + } + } +} + + + + + + + +void computeHdiaHackOffsetsFromCoo( + int *allocationHeight, + int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + int cooBaseIndex + ) +{ + + int i,j,h; + + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + + // Find rows per hack + std::vector *rowsPerHack = new std::vector [hackCount]; + + for (i=0; i diagIdsToPos; + + hackOffsets[0] = 0; + for (h=0; h *hackRows = &rowsPerHack[h]; + int hackRowsSize = hackRows->size(); + + for (j=0; jat(j); + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int diagId = (colIdx-cooBaseIndex) - ((rowIdx-cooBaseIndex) % hackSize); + int diagPos = hackSize - 1 + diagId; + + std::map::iterator it = diagIdsToPos.find(diagPos); + + if(it == diagIdsToPos.end()) + { + diagIdsToPos[diagPos] = 1; + ++diagonalsCount; + } + } + + hackOffsets[h+1] = hackOffsets[h] + diagonalsCount; + } + + *allocationHeight = hackOffsets[hackCount]; + + delete[] rowsPerHack; +} + +void cooToHdia_size( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + size_t elementSize + ) +{ + int i,j,h; + + int hackCount = getHdiaHacksCount(hackSize, rowsCount); + + // Find rows per hack + std::vector *rowsPerHack = new std::vector [hackCount]; + + for (i=0; i hackDiagIdsToPos; + + for (h=0; h *hackRows = &rowsPerHack[h]; + int hackRowsSize = hackRows->size(); + + for (j=0; jat(j); + + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int globalDiagId = colIdx - rowIdx; + int diagId = (colIdx - cooBaseIndex) - ((rowIdx - cooBaseIndex) % hackSize); + int diagPos = hackSize - 1 + diagId; + + std::map::iterator it = hackDiagIdsToPos.find(diagPos); + + if(it == hackDiagIdsToPos.end()) + { + hackDiagIdsToPos[diagPos] = globalDiagId; + } + } + + // Reorder diags + for (std::map::iterator it = hackDiagIdsToPos.begin(); it != hackDiagIdsToPos.end(); ++it) + { + int i = it->first; + + int globalDiagId = it->second; + int diagPosInsideOffsets; + int diagId = i - hackSize + 1; + hackDiagIdsToPos[i] = diagPosInsideOffsets = diagonalsCount++; + hdiaOffsets[diagPosInsideOffsets] = globalDiagId; + } + + + hdiaOffsets += diagonalsCount; + + for (j=0; jat(j); + int rowIdx = cooRowIndices[i]; + int colIdx = cooColsIndices[i]; + int diagId = (colIdx - cooBaseIndex) - ((rowIdx - cooBaseIndex) % hackSize); + + int diagPosInsideOffsets = hackDiagIdsToPos[hackSize - 1 + diagId]; + + char* valAddr = (char*)hdiaValues + + elementSize*(((rowIdx - cooBaseIndex) % hackSize) + + hackSize* (hackOffsets[h] + diagPosInsideOffsets)); + + memcpy(valAddr, (const char*)cooValues + i*elementSize, elementSize); + } + } + + delete[] rowsPerHack; +} + + + +void cooToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ) +{ + size_t elementSize = spgpuSizeOf(valuesType); + + cooToHdia_size(hdiaValues, hdiaOffsets, + hackOffsets, hackSize, rowsCount, + columnsCount, nonZerosCount, + cooRowIndices, cooColsIndices, cooValues, cooBaseIndex, elementSize); +} + +void bcooToBhdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType, + int blockSize + ) +{ + size_t elementSize = blockSize*spgpuSizeOf(valuesType); + + cooToHdia_size(hdiaValues, hdiaOffsets, + hackOffsets, hackSize, rowsCount, + columnsCount, nonZerosCount, + cooRowIndices, cooColsIndices, cooValues, cooBaseIndex, elementSize); +} + diff --git a/cuda/spgpu/hdia.h b/cuda/spgpu/hdia.h new file mode 100644 index 00000000..e8808fb7 --- /dev/null +++ b/cuda/spgpu/hdia.h @@ -0,0 +1,159 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2013 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + + +/** \addtogroup diaFun DIA/HDIA Format + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +/** +* \fn spgpuShdiaspmv (spgpuHandle_t handle, float* z, const float *y, float alpha, const float* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const float *x, float beta) + * Computes single precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuShdiaspmv (spgpuHandle_t handle, + float* z, + const float *y, + float alpha, + const float* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const float *x, + float beta); + + +/** +* \fn spgpuDhdiaspmv (spgpuHandle_t handle, double* z, const double *y, double alpha, const double* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const double *x, double beta) + * Computes double precision z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuDhdiaspmv (spgpuHandle_t handle, + double* z, + const double *y, + double alpha, + const double* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const double *x, + double beta); + + +/** +* \fn spgpuChdiaspmv (spgpuHandle_t handle, cuFloatComplex* z, const cuFloatComplex *y, cuFloatComplex alpha, const cuFloatComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuFloatComplex *x, cuFloatComplex beta) + * Computes single precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuChdiaspmv (spgpuHandle_t handle, + cuFloatComplex* z, + const cuFloatComplex *y, + cuFloatComplex alpha, + const cuFloatComplex* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const cuFloatComplex *x, + cuFloatComplex beta); + + +/** +* \fn spgpuZhdiaspmv (spgpuHandle_t handle, cuDoubleComplex* z, const cuDoubleComplex *y, cuDoubleComplex alpha, const cuDoubleComplex* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const cuDoubleComplex *x, cuDoubleComplex beta) + * Computes double precision complex z = alpha*A*x + beta*y, with A stored in Hacked Diagonal Format on GPU. + * \param handle The spgpu handle used to call this routine + * \param z The output vector of the routine. z could be y, but not y + k (i.e. an overlapping area over y, but starting from a base index different from y). + * \param y The y input vector + * \param alpha The alpha scalar + * \param dM The stacked HDIA non zero values allocation pointer + * \param offsets The stacked HDIA diagonals offsets vector + * \param hackSize The constant size of every hack (must be a multiple of 32) + * \param hackOffsets the array of base index offset for every hack of HDIA offsets vector, plus a last value equal to the size of the offsets vector + * \param rows the rows count + * \param cols the columns count + * \param x the x vector + * \param beta the beta scalar + */ +void +spgpuZhdiaspmv (spgpuHandle_t handle, + cuDoubleComplex* z, + const cuDoubleComplex *y, + cuDoubleComplex alpha, + const cuDoubleComplex* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const cuDoubleComplex *x, + cuDoubleComplex beta); + +/** @}*/ + +#ifdef __cplusplus +} +#endif + diff --git a/cuda/spgpu/hdia_conv.h b/cuda/spgpu/hdia_conv.h new file mode 100644 index 00000000..c22bf990 --- /dev/null +++ b/cuda/spgpu/hdia_conv.h @@ -0,0 +1,102 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2013 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +int getHdiaHacksCount(int hackSize, int rowsCount); + +void computeHdiaHackOffsets( + int *allocationHeight, + int *hackOffsets, + int hackSize, + const void* diaValues, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType); + +void diaToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + const void* diaValues, + const int* diaOffsets, + int diaValuesPitch, + int diagonals, + int rowsCount, + spgpuType_t valuesType + ); + +void computeHdiaHackOffsetsFromCoo( + int *allocationHeight, + int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + int cooBaseIndex + ); + +void cooToHdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType + ); + +void bcooToBhdia( + void *hdiaValues, + int *hdiaOffsets, + const int *hackOffsets, + int hackSize, + int rowsCount, + int columnsCount, + int nonZerosCount, + const int* cooRowIndices, + const int* cooColsIndices, + const void* cooValues, + int cooBaseIndex, + spgpuType_t valuesType, + int blockSize); + +#ifdef __cplusplus +} +#endif + + +/** @}*/ diff --git a/cuda/spgpu/hell.c b/cuda/spgpu/hell.c new file mode 100644 index 00000000..07764ee3 --- /dev/null +++ b/cuda/spgpu/hell.c @@ -0,0 +1,104 @@ +#include "hell.h" +#include "hell_conv.h" + +void computeHellAllocSize( + int* allocationHeight, + int hackSize, + int rowsCount, + const int *ellRowLengths + ) +{ + int totalLen = 0; + int i; + int remainings; + int done; + int maxLen; + + for (i=0; i maxLen) + maxLen = curLen; + } + totalLen += maxLen; + } + + remainings = rowsCount % hackSize; + done = (rowsCount/hackSize)*hackSize; + maxLen = 0; + + for (i=0; i maxLen) + maxLen = curLen; + } + + *allocationHeight = totalLen + maxLen; +} + +void ellToHell( + void *hellValues, + int *hellIndices, + int* hackOffsets, + int hackSize, + + const void *ellValues, + const int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int *ellRowLengths, + int rowsCount, + spgpuType_t valuesType + ) +{ + + size_t elementSize = spgpuSizeOf(valuesType); + + int hacks = (rowsCount + hackSize - 1)/hackSize; + + char* currValPos = (char*)hellValues; + int* currIndPos = hellIndices; + + int hackOffset = 0; + int i; + for (i=0; i= rowsCount) + break; + + rowLen = ellRowLengths[row]; + + if (rowLen > maxLen) + maxLen = rowLen; + + for (k=0; k + +/** \addtogroup conversionRoutines Conversion Routines + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void computeHellAllocSize(int* allocationHeight, int hackSize, int rowsCount, const int *ellRowLengths) + * Compute the HELL format allocation's height for the Hell allocation + * (the resulting size should be allocationHeight*hackSize*sizeof(elementsType)). + * \param allocationHeight outputs the total allocation's height + * \param hackSize the hack size for this matrix (32 or 64 are good choices) + * \param rowsCount the rows count + * \param ellRowLengths the row lengths array from the ell matrix to convert +*/ +void computeHellAllocSize( + int* allocationHeight, + int hackSize, + int rowsCount, + const int *ellRowLengths + ); + +/** +* \fn void ellToHell(void *hellValues, int *hellIndices, int* hackOffsets, int hackSize, const void *ellValues, const int *ellIndices, int ellValuesPitch, int ellIndicesPitch, int *ellRowLengths, int rowsCount, spgpuType_t valuesType) + * Convert a matrix from the ELL format to the HELL format. + * \param hellValues pointer to the area that will be filled by the non zero coefficients + * \param hellIndices pointer to the area that will be filled by the non zero indices + * \param hackOffsets + * \param hackSize the hack size used to allocate hellValues and hellIndices (32 or 64 are good choices) + * \param ellValues the input matrix coefficients + * \param ellIndices the input matrix indices + * \param ellValuesPitch the input values allocation pitch (in number of elements) + * \param ellIndicesPitch the input indices allocation pitch (in number of elements) + * \param ellRowLengths the row lengths array of the input matrix + * \param rowsCount the rows count + * \param valuesType the type of hellValues and ellValues elements (i.e. SPGPU_TYPE_FLOAT or SPGPU_TYPE_DOUBLE) +*/ +void ellToHell( + void *hellValues, + int *hellIndices, + int* hackOffsets, + int hackSize, + const void *ellValues, + const int *ellIndices, + int ellValuesPitch, + int ellIndicesPitch, + int *ellRowLengths, + int rowsCount, + spgpuType_t valuesType + ); + +#ifdef __cplusplus +} +#endif + + +/** @}*/ diff --git a/cuda/spgpu/kernels/Makefile b/cuda/spgpu/kernels/Makefile new file mode 100644 index 00000000..c4ae38a0 --- /dev/null +++ b/cuda/spgpu/kernels/Makefile @@ -0,0 +1,34 @@ +TOP=../../.. +include $(TOP)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOP)/lib +INCDIR=$(TOP)/include +MODDIR=$(TOP)/modules +UP=.. +LIBNAME=$(UP)/libspgpu.a +CINCLUDES=-I$(INCDIR) + +OBJS=cabs.o camax.o casum.o caxpby.o caxy.o cdot.o cgath.o \ + cnrm2.o cscal.o cscat.o csetscal.o cabgdxyz.o\ + dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o dabgdxyz.o\ + dia_cspmv.o dia_dspmv.o dia_sspmv.o dia_zspmv.o dnrm2.o \ + dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o \ + ell_dcsput.o ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \ + hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \ + hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \ + saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o sabgdxyz.o\ + zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zabgdxyz.o \ + sxyzw.o cxyzw.o dxyzw.o zxyzw.o + +objs: $(OBJS) +lib: objs + ar cur $(UP)/$(LIBNAME) $(OBJS) + + +clean: + /bin/rm -fr $(OBJS) +.cu.o: + $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) $(CUDA_INCLUDES) -c $< + diff --git a/cuda/spgpu/kernels/abs_base.cuh b/cuda/spgpu/kernels/abs_base.cuh new file mode 100644 index 00000000..d48f815d --- /dev/null +++ b/cuda/spgpu/kernels/abs_base.cuh @@ -0,0 +1,110 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),abs) + +#define BLOCK_SIZE 256 + +// Define: +//#define RES_VALUE_TYPE +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__device__ __host__ static inline bool is_one_float(float x) { return (x==1.0f); } +__device__ __host__ static inline bool is_one_cuFloatComplex(cuFloatComplex x) { return ((x.x==1.0f)&&(x.y==0.0f));} + + +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static inline bool is_one_double(double x) { return (x==1.0); } +__device__ __host__ static inline bool is_one_cuDoubleComplex(cuDoubleComplex x) { return ((x.x==1.0)&&(x.y==0.0));} +#endif + + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern_alpha) + (RES_VALUE_TYPE *y, int n, RES_VALUE_TYPE alpha, VALUE_TYPE* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since y, and x are accessed with the same offset by the same thread, + // and the write to y follows the read of x, then x could be y. + + y[id] = CONCAT(RES_VALUE_TYPE, _mul)(alpha, CONCAT(VALUE_TYPE, _abs)(x[id])); + } +} + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (RES_VALUE_TYPE *y, int n, VALUE_TYPE* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since y, and x are accessed with the same offset by the same thread, + // and the write to y follows the read of x, then x could be y. + + y[id] = CONCAT(VALUE_TYPE, _abs)(x[id]); + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, RES_VALUE_TYPE *y, int n, RES_VALUE_TYPE alpha, VALUE_TYPE* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + + if (CONCAT(is_one_,RES_VALUE_TYPE)(alpha)) + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(y, n, x); + else + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern_alpha)<<currentStream>>>(y, n, alpha, x); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device RES_VALUE_TYPE *y, + int n, + RES_VALUE_TYPE alpha, + __device VALUE_TYPE *x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, maxNForACall, alpha, x); + x = x + maxNForACall; + y = y + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) (handle, y, n, alpha, x); + cudaCheckError("CUDA error on abs"); +} diff --git a/cuda/spgpu/kernels/amax_base.cuh b/cuda/spgpu/kernels/amax_base.cuh new file mode 100644 index 00000000..4d7e7d4a --- /dev/null +++ b/cuda/spgpu/kernels/amax_base.cuh @@ -0,0 +1,233 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_AMAX_NAME +#define GEN_SPGPU_AMAX_NAME(x) CONCAT(CONCAT(spgpu,x),amax) + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#define BLOCK_SIZE 512 + +typedef float absType_float; +typedef float absType_cuFloatComplex; +typedef double absType_double; +typedef double absType_cuDoubleComplex; + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ __host__ static float abs_float(float a) { return fabsf(a); } +__device__ __host__ static float abs_cuFloatComplex(cuFloatComplex a) { return cuCabsf(a); } + + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ __host__ static double abs_double(float a) { return fabs(a); } +__device__ __host__ static double abs_cuDoubleComplex(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +static __device__ CONCAT(absType_,VALUE_TYPE) CONCAT(TYPE_SYMBOL,amaxReductionResult)[128]; + +#define MAX(a,b) ((a) > (b) ? (a) : (b)) + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxvv(VALUE_TYPE a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absa = CONCAT(abs_,VALUE_TYPE)(a); + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return MAX(absa,absb); +} + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxaa(CONCAT(absType_,VALUE_TYPE) a, CONCAT(absType_,VALUE_TYPE) b) +{ + return MAX(a,b); +} + +static __device__ CONCAT(absType_,VALUE_TYPE) amaxav(CONCAT(absType_,VALUE_TYPE) a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return MAX(a,absb); +} + +__global__ void +CONCAT(GEN_SPGPU_AMAX_NAME(TYPE_SYMBOL),_kern) +(int n, VALUE_TYPE* x) +{ + __shared__ CONCAT(absType_,VALUE_TYPE) sSum[BLOCK_SIZE]; + + CONCAT(absType_,VALUE_TYPE) res = 0; + + VALUE_TYPE* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + VALUE_TYPE x1 = x[0]; + res = amaxav(res, x1); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + CONCAT(absType_,VALUE_TYPE) tRes[128]; + + CONCAT(GEN_SPGPU_AMAX_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, CONCAT(TYPE_SYMBOL,amaxReductionResult), blocks*sizeof(CONCAT(absType_,VALUE_TYPE))); + + for (int i=0; i= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ __host__ static double abs_double(float a) { return fabs(a); } +__device__ __host__ static double abs_cuDoubleComplex(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ __host__ static double double_add(double a, double b) { +#ifndef __CUDA__ARCH__ + return a + b; +#else + return PREC_DADD (a, b); +#endif +} +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +static __device__ CONCAT(absType_,VALUE_TYPE) CONCAT(TYPE_SYMBOL,asumReductionResult)[128]; + + +static __device__ CONCAT(absType_,VALUE_TYPE) asumvv(VALUE_TYPE a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absa = CONCAT(abs_,VALUE_TYPE)(a); + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return absa + absb; +} + +static __device__ __host__ CONCAT(absType_,VALUE_TYPE) asumaa(CONCAT(absType_,VALUE_TYPE) a, CONCAT(absType_,VALUE_TYPE) b) +{ + return a + b; +} + +static __device__ CONCAT(absType_,VALUE_TYPE) asumav(CONCAT(absType_,VALUE_TYPE) a, VALUE_TYPE b) +{ + CONCAT(absType_,VALUE_TYPE) absb = CONCAT(abs_,VALUE_TYPE)(b); + + return a + absb; +} + +__global__ void +CONCAT(GEN_SPGPU_ASUM_NAME(TYPE_SYMBOL),_kern) +(int n, VALUE_TYPE* x) +{ + __shared__ CONCAT(absType_,VALUE_TYPE) sSum[BLOCK_SIZE]; + + CONCAT(absType_,VALUE_TYPE) res = 0; + + VALUE_TYPE* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + VALUE_TYPE x1 = x[0]; + res = asumav(res, x1); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + CONCAT(absType_,VALUE_TYPE) tRes[128]; + + CONCAT(GEN_SPGPU_ASUM_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, CONCAT(TYPE_SYMBOL,asumReductionResult), blocks*sizeof(CONCAT(absType_,VALUE_TYPE))); + + for (int i=0; icurrentStream>>>(z, n, alpha, x, y); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *z, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE *x, + __device VALUE_TYPE *y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, z, maxNForACall, alpha, x, y); + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) (handle, z, n, alpha, x, y); + cudaCheckError("CUDA error on axy"); +} + +void +GEN_SPGPU_MFUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *z, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y, + int count, + int pitch) +{ + for (int i=0; icurrentStream>>>(w, n, beta, z, alpha, x, y); + +} + +void +GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *w, + int n, + VALUE_TYPE beta, + __device VALUE_TYPE *z, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y + ) +{ + + if (CONCAT(VALUE_TYPE, _isZero(alpha))) + { + GEN_SPGPU_SCAL_NAME(TYPE_SYMBOL) + (handle, w, n, beta, z); + } + else if (CONCAT(VALUE_TYPE, _isZero(beta))) { + GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (handle, w, n, alpha, x, y); + } + else { + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + + CONCAT(GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL),_) + (handle, w, maxNForACall, beta, z, alpha, x, y); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + w = w + maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME_2(TYPE_SYMBOL),_) + (handle, w, n, beta, z, alpha, x, y); + } + + cudaCheckError("CUDA error on axypbz"); +} + +void +GEN_SPGPU_MFUNC_NAME_2(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *w, + int n, + VALUE_TYPE beta, + __device VALUE_TYPE *z, + VALUE_TYPE alpha, + __device VALUE_TYPE* x, + __device VALUE_TYPE *y, + int count, + int pitch) +{ + for (int i=0; i + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex alpha, cuFloatComplex beta, + cuFloatComplex gamma, cuFloatComplex delta, + cuFloatComplex * x, cuFloatComplex *y, cuFloatComplex *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuFloatComplex t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (cuFloatComplex_isZero(beta)) + t = cuCmulf(alpha,x[id]); + else + t = cuCfmaf(alpha, x[id], cuCmulf(beta,y[id])); + if (cuFloatComplex_isZero(delta)) + z[id] = cuCmulf(gamma, t); + else + z[id] = cuCfmaf(gamma, t, cuCmulf(delta,z[id])); + y[id] = t; + } +} + + +void spgpuCabgdxyz(spgpuHandle_t handle, + int n, + cuFloatComplex alpha, + cuFloatComplex beta, + cuFloatComplex gamma, + cuFloatComplex delta, + __device cuFloatComplex * x, + __device cuFloatComplex * y, + __device cuFloatComplex *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/cabs.cu b/cuda/spgpu/kernels/cabs.cu new file mode 100644 index 00000000..f5e9d3ea --- /dev/null +++ b/cuda/spgpu/kernels/cabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define RES_VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/camax.cu b/cuda/spgpu/kernels/camax.cu new file mode 100644 index 00000000..049bbbe3 --- /dev/null +++ b/cuda/spgpu/kernels/camax.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#include "amax_base.cuh" diff --git a/cuda/spgpu/kernels/casum.cu b/cuda/spgpu/kernels/casum.cu new file mode 100644 index 00000000..7f4c92c5 --- /dev/null +++ b/cuda/spgpu/kernels/casum.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#include "asum_base.cuh" diff --git a/cuda/spgpu/kernels/caxpby.cu b/cuda/spgpu/kernels/caxpby.cu new file mode 100644 index 00000000..817fdf53 --- /dev/null +++ b/cuda/spgpu/kernels/caxpby.cu @@ -0,0 +1,153 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +#if 1 + +__global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + if (cuFloatComplex_isZero(beta)) { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = cuCmulf(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id])); + } + } +} + +void spgpuCaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +#else + + +__global__ void spgpuCaxpby_krn(cuFloatComplex *z, int n, cuFloatComplex beta, cuFloatComplex *y, cuFloatComplex alpha, cuFloatComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (cuFloatComplex_isZero(beta)) + z[id] = cuCmulf(alpha,x[id]); + else + z[id] = cuCfmaf(beta, y[id], cuCmulf(alpha, x[id])); + } +} + + +void spgpuCaxpby_(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuCaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuCaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + spgpuCaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuCaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on saxpby"); +} +#endif +void spgpuCmaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + + cuFloatComplex tRes[128]; + + spgpuCdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, sdotReductionResult, blocks*sizeof(cuFloatComplex)); + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + + spgpuCnrm2_kern<<currentStream>>>(n, x); + cudaMemcpyFromSymbol(tRes, snrm2ReductionResult,blocks*sizeof(float)); + + for (int i=0; i= 200 +#define PREC_FADD(a,b) ((a) + (b)) +#define PREC_FMUL(a,b) ((a) * (b)) +#else +#define PREC_FADD(a,b) __fadd_rn((a),(b)) +#define PREC_FMUL(a,b) __fmul_rn((a),(b)) +#endif + +#define PREC_DADD(a,b) ((a) + (b)) +#define PREC_DMUL(a,b) ((a) * (b)) + + +inline __host__ __device__ double2 make_double2(double s) +{ + return make_double2(s, s); +} + +inline __host__ __device__ double2 operator+(double2 a, double2 b) +{ + return make_double2(a.x + b.x, a.y + b.y); +} + +inline __host__ __device__ void operator+=(double2 &a, double2 b) +{ + a.x += b.x; a.y += b.y; +} + +inline __host__ __device__ double2 operator-(double2 a, double2 b) +{ + return make_double2(a.x - b.x, a.y - b.y); +} + +inline __host__ __device__ void operator-=(double2 &a, double2 b) +{ + a.x -= b.x; a.y -= b.y; +} + +inline __host__ __device__ double2 operator*(double2 a, double s) +{ + return make_double2(a.x * s, a.y * s); +} + +inline __host__ __device__ double2 operator*(double s, double2 a) +{ + return make_double2(a.x * s, a.y * s); +} + +inline __host__ __device__ void operator*=(double2 &a, double s) +{ + a.x *= s; a.y *= s; +} diff --git a/cuda/spgpu/kernels/cxyzw.cu b/cuda/spgpu/kernels/cxyzw.cu new file mode 100644 index 00000000..d2b332b1 --- /dev/null +++ b/cuda/spgpu/kernels/cxyzw.cu @@ -0,0 +1,78 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuCxyzw_krn(int n, cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + cuFloatComplex * x, cuFloatComplex *y, + cuFloatComplex *z, cuFloatComplex *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuFloatComplex ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = cuCfmaf(a, x[id], cuCmulf(b,y[id])); + tz = cuCfmaf(c, ty, cuCmulf(d,z[id])); + w[id] = cuCfmaf(e, tz, cuCmulf(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuCxyzw(spgpuHandle_t handle, + int n, + cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + __device cuFloatComplex * x, + __device cuFloatComplex * y, + __device cuFloatComplex * z, + __device cuFloatComplex *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuCxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/dabgdxyz.cu b/cuda/spgpu/kernels/dabgdxyz.cu new file mode 100644 index 00000000..f2b18e02 --- /dev/null +++ b/cuda/spgpu/kernels/dabgdxyz.cu @@ -0,0 +1,79 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma, double delta, + double* x, double *y, double *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + double t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (beta == 0.0) + t = PREC_DMUL(alpha,x[id]); + else + t = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + if (delta == 0.0) + z[id] = gamma * t; + else + z[id] = PREC_DADD(PREC_DMUL(gamma, t), PREC_DMUL(delta,z[id])); + y[id] = t; + } +} + + +void spgpuDabgdxyz(spgpuHandle_t handle, + int n, + double alpha, + double beta, + double gamma, + double delta, + __device double* x, + __device double* y, + __device double *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/dabs.cu b/cuda/spgpu/kernels/dabs.cu new file mode 100644 index 00000000..7fb9c074 --- /dev/null +++ b/cuda/spgpu/kernels/dabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define RES_VALUE_TYPE double +#define TYPE_SYMBOL D +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/damax.cu b/cuda/spgpu/kernels/damax.cu new file mode 100644 index 00000000..1e78cc1f --- /dev/null +++ b/cuda/spgpu/kernels/damax.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "amax_base.cuh" + diff --git a/cuda/spgpu/kernels/dasum.cu b/cuda/spgpu/kernels/dasum.cu new file mode 100644 index 00000000..71a37e80 --- /dev/null +++ b/cuda/spgpu/kernels/dasum.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "asum_base.cuh" + diff --git a/cuda/spgpu/kernels/daxpby.cu b/cuda/spgpu/kernels/daxpby.cu new file mode 100644 index 00000000..e4823b34 --- /dev/null +++ b/cuda/spgpu/kernels/daxpby.cu @@ -0,0 +1,147 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + + +#if 1 +__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + if (beta == 0.0) { + for ( ; id < n; id +=gridSize) + { + + z[id] = PREC_DMUL(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + { + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } + } +} + +void spgpuDaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else + +__global__ void spgpuDaxpby_krn(double *z, int n, double beta, double *y, double alpha, double* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0) + z[id] = PREC_DMUL(alpha,x[id]); + else + z[id] = PREC_DADD(PREC_DMUL(alpha, x[id]), PREC_DMUL(beta,y[id])); + } +} + + +void spgpuDaxpby_(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuDaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuDaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuDaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuDaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on daxpby"); +} + +#endif +void spgpuDmaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + double tRes[128]; + + spgpuDdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, ddotReductionResult,blocks*sizeof(double)); + + for (int i=0; i= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif + +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_vanilla) +#define GEN_SPGPU_DIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),diaspmv_vanilla) +#include "dia_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_prefetch) +#define GEN_SPGPU_DIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),diaspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "dia_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef ENABLE_CACHE +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache_prefetch) +#define GEN_SPGPU_DIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache_prefetch) +#include "dia_spmv_base_template.cuh" +#undef GEN_SPGPU_DIA_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache) +#define GEN_SPGPU_DIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),diaspmv_texcache) +#include "dia_spmv_base_template.cuh" +#endif + +#undef GEN_SPGPU_DIA_NAME +#define GEN_SPGPU_DIA_NAME(x) CONCAT(CONCAT(spgpu,x),diaspmv) +void +GEN_SPGPU_DIA_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* dM, + const int* offsets, + int dMPitch, + int rows, + int cols, + int diags, + const VALUE_TYPE *x, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (rows > maxNForACall) //managing large vectors + { + //if (diags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + //else + CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + #if 0 + if (diags < 20) + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); + else + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, maxNForACall, cols, diags, x, beta); +#endif + y = y + maxNForACall; + z = z + maxNForACall; + dM = dM + maxNForACall; + + rows -= maxNForACall; + } + CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +#if 0 + //if (diags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_DIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + //else + if (diags < 20) + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + else + CONCAT(_,GEN_SPGPU_DIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +#endif + cudaCheckError("CUDA error on dia_spmv"); +} + diff --git a/cuda/spgpu/kernels/dia_spmv_base_template.cuh b/cuda/spgpu/kernels/dia_spmv_base_template.cuh new file mode 100644 index 00000000..792fa7ed --- /dev/null +++ b/cuda/spgpu/kernels/dia_spmv_base_template.cuh @@ -0,0 +1,217 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x, VALUE_TYPE beta) +{ + int i = threadIdx.x + blockIdx.x * (blockDim.x); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows && CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + dM += i; + + extern __shared__ int offsetsChunk[]; + + int rounds = (diags + blockDim.x - 1)/blockDim.x; + + for (int r = 0; r < rounds; r++) + { + // in the last round diags will be <= blockDim.x + if (threadIdx.x < diags) + offsetsChunk[threadIdx.x] = offsets[threadIdx.x]; + + __syncthreads(); + + if (i < rows) + { + int count = min(diags, blockDim.x ); + + +#ifdef USE_PREFETCHING + int j; + for (j=0; j<=count-3; j += 3) + { + // Prefetch 3 values + int column1 = offsetsChunk[j] + i; + int column2 = offsetsChunk[j+1] + i; + int column3 = offsetsChunk[j+2] + i; + + bool inside1 = column1 >= 0 && column1 < cols; + bool inside2 = column2 >= 0 && column2 < cols; + bool inside3 = column3 >= 0 && column3 < cols; + + // Anticipate global memory read + + VALUE_TYPE xValue1, xValue2, xValue3; + VALUE_TYPE mValue1, mValue2, mValue3; + + if(inside1) + { + mValue1 = dM[0]; +#ifdef ENABLE_CACHE + xValue1 = fetchTex (column1); +#else + xValue1 = x[column1]; +#endif + } + dM += dMPitch; + + if(inside2) + { + mValue2 = dM[0]; +#ifdef ENABLE_CACHE + xValue2 = fetchTex (column2); +#else + xValue2 = x[column2]; +#endif + } + dM += dMPitch; + + if(inside3) + { + mValue3 = dM[0]; +#ifdef ENABLE_CACHE + xValue3 = fetchTex (column3); +#else + xValue3 = x[column3]; +#endif + } + dM += dMPitch; + + if(inside1) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue1, xValue1, zProd); + if(inside2) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue2, xValue2, zProd); + if(inside3) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue3, xValue3, zProd); + } + + for (;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + + } + + dM += dMPitch; + } +#else + for (int j=0; j= 0 && column < cols) + { + + VALUE_TYPE xValue; + +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += dMPitch; + } +#endif + } + + diags -= blockDim.x; + offsets += blockDim.x; + __syncthreads(); + } + + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (i >= rows) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[i] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[i] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x) +{ + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, CONCAT(zero_,VALUE_TYPE)()); +} + +__global__ void +CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, const VALUE_TYPE *x, VALUE_TYPE beta) +{ + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); +} + +void +CONCAT(_,GEN_SPGPU_DIA_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* dM, const int* offsets, int dMPitch, int rows, int cols, int diags, + const VALUE_TYPE *x, VALUE_TYPE beta) +{ + dim3 block (THREAD_BLOCK ); + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK ); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + else + CONCAT(GEN_SPGPU_DIA_NAME(TYPE_SYMBOL), _krn_b0) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x); + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/dia_sspmv.cu b/cuda/spgpu/kernels/dia_sspmv.cu new file mode 100644 index 00000000..2870d780 --- /dev/null +++ b/cuda/spgpu/kernels/dia_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "dia.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "dia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/dia_zspmv.cu b/cuda/spgpu/kernels/dia_zspmv.cu new file mode 100644 index 00000000..7e2fe68f --- /dev/null +++ b/cuda/spgpu/kernels/dia_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "dia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "dia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/dnrm2.cu b/cuda/spgpu/kernels/dnrm2.cu new file mode 100644 index 00000000..450882c4 --- /dev/null +++ b/cuda/spgpu/kernels/dnrm2.cu @@ -0,0 +1,157 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + + +//#define USE_CUBLAS + +//#define ASSUME_LOCK_SYNC_PARALLELISM + + +#define BLOCK_SIZE 512 + +static __device__ double dnrm2ReductionResult[128]; + +__global__ void spgpuDnrm2_kern(int n, double* x) +{ + __shared__ double sSum[BLOCK_SIZE]; + + double res = 0; + + double* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + while (x < lastX) + { + double x1 = x[0]; + res = PREC_DADD(res, PREC_DMUL(x1, x1)); + + x += blockOffset; + + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + double tRes[128]; + + spgpuDnrm2_kern<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, dnrm2ReductionResult,blocks*sizeof(double)); + + for (int i=0; i + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuDxyzw_krn(int n, double a, double b, + double c, double d, + double e, double f, + double * x, double *y, + double *z, double *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + double ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = PREC_DADD(PREC_DADD(a, x[id]), PREC_DMUL(b,y[id])); + tz = PREC_DADD(PREC_DADD(c, ty), PREC_DMUL(d,z[id])); + w[id] = PREC_DADD(PREC_DADD(e, tz), PREC_DMUL(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuDxyzw(spgpuHandle_t handle, + int n, + double a, double b, + double c, double d, + double e, double f, + __device double * x, + __device double * y, + __device double * z, + __device double *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuDxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/ell_ccsput.cu b/cuda/spgpu/kernels/ell_ccsput.cu new file mode 100644 index 00000000..29e4d54c --- /dev/null +++ b/cuda/spgpu/kernels/ell_ccsput.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_cspmv.cu b/cuda/spgpu/kernels/ell_cspmv.cu new file mode 100644 index 00000000..88bf47f7 --- /dev/null +++ b/cuda/spgpu/kernels/ell_cspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_csput_base.cuh b/cuda/spgpu/kernels/ell_csput_base.cuh new file mode 100644 index 00000000..95e0e33d --- /dev/null +++ b/cuda/spgpu/kernels/ell_csput_base.cuh @@ -0,0 +1,126 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellcsput) + +#define THREAD_BLOCK 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE alpha, VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, + int nnz, int *aI, int *aJ, VALUE_TYPE *aVal, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + if (i < nnz) + { + int row = aI[i] - baseIndex; + int column = aJ[i]; + VALUE_TYPE value = aVal[i]; + + if (row < 0) + return; + + // Select row + cM += row; + rP += row; + rS += row; + + // Binary search + int lower = 0; + int upper = (*rS - 1); + + while(lower <= upper) + { + int medium = (lower + upper) / 2; + + int currentColumn = rP[medium*rPPitch]; + + if(currentColumn == column) + { + cM[medium*cMPitch] = value; + break; + } + else if(currentColumn < column) + lower = medium + 1; + else + upper = medium - 1; + } + } +} + + + +void +CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE alpha, VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, + int nnz, int *aI, int *aJ, VALUE_TYPE *aVal, int baseIndex) +{ + dim3 block (THREAD_BLOCK, 1); + + dim3 grid ((nnz + THREAD_BLOCK - 1) / THREAD_BLOCK); + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, 0, handle->currentStream >>> (alpha, cM, rP, cMPitch, rPPitch, rS, nnz, aI, aJ, aVal, baseIndex); +} + +void +GEN_SPGPU_ELL_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + VALUE_TYPE alpha, + VALUE_TYPE* cM, + const int* rP, + int cMPitch, + int rPPitch, + const int* rS, + int nnz, + int *aI, + int *aJ, + VALUE_TYPE *aVal, + int baseIndex) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (nnz > maxNForACall) //managing large vectors + { + CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) + (handle, alpha, cM, rP, cMPitch, rPPitch, rS, maxNForACall, aI, aJ, aVal, baseIndex); + + aI = aI + maxNForACall; + aJ = aJ + maxNForACall; + aVal = aVal + maxNForACall; + + nnz -= maxNForACall; + } + + CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) + (handle, alpha, cM, rP, cMPitch, rPPitch, rS, nnz, aI, aJ, aVal, baseIndex); + + cudaCheckError("CUDA error on ell_csput"); +} + diff --git a/cuda/spgpu/kernels/ell_dcsput.cu b/cuda/spgpu/kernels/ell_dcsput.cu new file mode 100644 index 00000000..00e60565 --- /dev/null +++ b/cuda/spgpu/kernels/ell_dcsput.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_dspmv.cu b/cuda/spgpu/kernels/ell_dspmv.cu new file mode 100644 index 00000000..4e682772 --- /dev/null +++ b/cuda/spgpu/kernels/ell_dspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#define TEX_FETCH_TYPE int2 +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_scsput.cu b/cuda/spgpu/kernels/ell_scsput.cu new file mode 100644 index 00000000..eeb5c6bb --- /dev/null +++ b/cuda/spgpu/kernels/ell_scsput.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_spmv_base.cuh b/cuda/spgpu/kernels/ell_spmv_base.cuh new file mode 100644 index 00000000..e2af5896 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base.cuh @@ -0,0 +1,154 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_ELL_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif + +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_vanilla) +#define GEN_SPGPU_ELL_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),ellspmv_vanilla) +#include "ell_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_prefetch) +#define GEN_SPGPU_ELL_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),ellspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "ell_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache_prefetch) +#define GEN_SPGPU_ELL_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache_prefetch) +#include "ell_spmv_base_template.cuh" +#undef GEN_SPGPU_ELL_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache) +#define GEN_SPGPU_ELL_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),ellspmv_texcache) +#include "ell_spmv_base_template.cuh" +#endif +#undef GEN_SPGPU_ELL_NAME +#define GEN_SPGPU_ELL_NAME(x) CONCAT(CONCAT(spgpu,x),ellspmv) +void +GEN_SPGPU_ELL_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* cM, + const int* rP, + int cMPitch, + int rPPitch, + const int* rS, + const __device int* rIdx, + int avgNnzPerRow, + int maxNnzPerRow, + int rows, + const VALUE_TYPE *x, + VALUE_TYPE beta, + int baseIndex) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (rows > maxNForACall) //managing large vectors + { +#if 0 + if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) +#endif + CONCAT(_,GEN_SPGPU_ELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); +#if 0 + else if (avgNnzPerRow < 20) + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); + else + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, maxNForACall, x, beta, baseIndex); +#endif + y = y + maxNForACall; + z = z + maxNForACall; + cM = cM + maxNForACall; + rP = rP + maxNForACall; + rS = rS + maxNForACall; + + rows -= maxNForACall; + } +#if 0 + if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) +#endif + CONCAT(_,GEN_SPGPU_ELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); + #if 0 + else if (avgNnzPerRow < 20) + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); + else + CONCAT(_,GEN_SPGPU_ELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, avgNnzPerRow, maxNnzPerRow, rows, x, beta, baseIndex); +#endif + cudaCheckError("CUDA error on ell_spmv"); +} + diff --git a/cuda/spgpu/kernels/ell_spmv_base_nors.cuh b/cuda/spgpu/kernels/ell_spmv_base_nors.cuh new file mode 100644 index 00000000..8cd89704 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base_nors.cuh @@ -0,0 +1,340 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int rows, int maxNnzPerRow, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[2][THREAD_BLOCK+1]; + + if (i < rows) + { + rP += i; cM += i; + + int rowSizeM = maxNnzPerRow / 4; + + + if ((maxNnzPerRow % 4) > threadIdx.y) + ++rowSizeM; + + rP += threadIdx.y*rPPitch; + cM += threadIdx.y*cMPitch; + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += 4*rPPitch; + + value = cM[0]; + cM += 4*cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y > 1) + temp[threadIdx.y - 2][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y <= 1) + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.y][threadIdx.x]); + + if (threadIdx.y == 1) + temp[1][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[1][threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, const int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) + { + rP += i; cM += i; + + int rowSizeM = maxNnzPerRow / 2; + + if (threadIdx.y == 0) + { + if (maxNnzPerRow % 2) + ++rowSizeM; + } + else + { + rP += rPPitch; + cM += cMPitch; + } + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + rP += i; cM += i; + +#ifdef USE_PREFETCHING + for (int j = 0; j < maxNnzPerRow / 2; j++) + { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += rPPitch; + pointers2 = rP[0] - baseIndex; + rP += rPPitch; + + values1 = cM[0]; + cM += cMPitch; + + values2 = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (maxNnzPerRow % 2) + { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < maxNnzPerRow; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rIdx, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) + { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + */ +} + + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4_noRs) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + */ + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) + (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _noRs) + (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); +} diff --git a/cuda/spgpu/kernels/ell_spmv_base_template.cuh b/cuda/spgpu/kernels/ell_spmv_base_template.cuh new file mode 100644 index 00000000..fa39d8a6 --- /dev/null +++ b/cuda/spgpu/kernels/ell_spmv_base_template.cuh @@ -0,0 +1,426 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define THREAD_BLOCK 128 + +#include "ell_spmv_base_nors.cuh" + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[2][THREAD_BLOCK+1]; + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + int rowSizeM = rowSize / 4; + + + if ((rowSize % 4) > threadIdx.y) + ++rowSizeM; + + rP += threadIdx.y*rPPitch; + cM += threadIdx.y*cMPitch; + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += 4*rPPitch; + + value = cM[0]; + cM += 4*cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y > 1) + temp[threadIdx.y - 2][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y <= 1) + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.y][threadIdx.x]); + + if (threadIdx.y == 1) + temp[1][threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[1][threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + int rowSizeM = rowSize / 2; + + if (threadIdx.y == 0) + { + if (rowSize % 2) + ++rowSizeM; + } + else + { + rP += rPPitch; + cM += cMPitch; + } + + + for (int j = 0; j < rowSizeM; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + } + + __syncthreads(); + + if (i < rows) + { + if (threadIdx.y == 0) + { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + rS += i; rP += i; cM += i; + + int rowSize = rS[0]; + +#ifdef USE_PREFETCHING + for (int j = 0; j < rowSize / 2; j++) + { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += rPPitch; + pointers2 = rP[0] - baseIndex; + rP += rPPitch; + + values1 = cM[0]; + cM += cMPitch; + + values2 = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (rowSize % 2) + { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < rowSize; j++) + { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += rPPitch; + + value = cM[0]; + cM += cMPitch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, const int* rIdx, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) + { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) + (i, yVal, outRow, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + */ +} + + +__device__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) + { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + + else //if (blockDim.y == 2) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + /* + else if (blockDim.y == 4) + + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _ridx_4) + (i, yVal, i, z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + */ + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); +} + +void +CONCAT(_,GEN_SPGPU_ELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* cM, const int* rP, int cMPitch, int rPPitch, const int* rS, + const __device int* rIdx, int avgNnzPerRow, int maxNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int avgThreshold; + + if (handle->capabilityMajor < 2) + avgThreshold = 8; + else if (handle->capabilityMajor < 3) + avgThreshold = 16; + else + avgThreshold = 32; + +#if defined(ELL_FORCE_THREADS_1) + dim3 block (THREAD_BLOCK, 1); +#elif defined(ELL_FORCE_THREADS_2) + dim3 block (THREAD_BLOCK, 2); +#else + dim3 block (THREAD_BLOCK, avgNnzPerRow >= avgThreshold ? 2 : 1); +#endif + + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + if (rIdx) + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs), cudaFuncCachePreferL1); + + if (rS) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rIdx, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_ridx_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rIdx, maxNnzPerRow, rows, x, beta, baseIndex); + } + else + { + + + if (rS) + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, rS, rows, x, baseIndex); + } + else + { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_ELL_NAME(TYPE_SYMBOL), _krn_b0_noRs) + <<< grid, block, 0, handle->currentStream >>> (z, y, alpha, cM, rP, cMPitch, rPPitch, maxNnzPerRow, rows, x, baseIndex); + } + } + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/ell_sspmv.cu b/cuda/spgpu/kernels/ell_sspmv.cu new file mode 100644 index 00000000..e64e03e8 --- /dev/null +++ b/cuda/spgpu/kernels/ell_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_zcsput.cu b/cuda/spgpu/kernels/ell_zcsput.cu new file mode 100644 index 00000000..3d0f3293 --- /dev/null +++ b/cuda/spgpu/kernels/ell_zcsput.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "ell_csput_base.cuh" + diff --git a/cuda/spgpu/kernels/ell_zspmv.cu b/cuda/spgpu/kernels/ell_zspmv.cu new file mode 100644 index 00000000..a9c0fe64 --- /dev/null +++ b/cuda/spgpu/kernels/ell_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "ell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "ell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/gath_base.cuh b/cuda/spgpu/kernels/gath_base.cuh new file mode 100644 index 00000000..a0e77b44 --- /dev/null +++ b/cuda/spgpu/kernels/gath_base.cuh @@ -0,0 +1,86 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),gath) + +#define BLOCK_SIZE 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (VALUE_TYPE* values, int count, const int* indices, int firstIndex, const VALUE_TYPE* vector) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < count) + { + int pos = indices[id]-firstIndex; + + if (pos < 0) + return; + + values[id] = vector[pos]; + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, VALUE_TYPE *xValues, int xNnz, + const __device int *xIndices, int xBaseIndex, const VALUE_TYPE* y) +{ + int msize = (xNnz+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(xValues, xNnz, xIndices, xBaseIndex, y); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device VALUE_TYPE* y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (xNnz > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, xValues, maxNForACall, xIndices, xBaseIndex, y); + xIndices += maxNForACall; + xValues += maxNForACall; + xNnz -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, xValues, xNnz, xIndices, xBaseIndex, y); + cudaCheckError("CUDA error on gath"); +} + diff --git a/cuda/spgpu/kernels/hdia_cspmv.cu b/cuda/spgpu/kernels/hdia_cspmv.cu new file mode 100644 index 00000000..52360b40 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_cspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "hdia_spmv_base.cuh" diff --git a/cuda/spgpu/kernels/hdia_dspmv.cu b/cuda/spgpu/kernels/hdia_dspmv.cu new file mode 100644 index 00000000..5fdb08e9 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_dspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +//#define ENABLE_CACHE +#define VALUE_TYPE double +#define TYPE_SYMBOL D +//#define TEX_FETCH_TYPE int2 +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hdia_spmv_base.cuh b/cuda/spgpu/kernels/hdia_spmv_base.cuh new file mode 100644 index 00000000..2c8cfbaf --- /dev/null +++ b/cuda/spgpu/kernels/hdia_spmv_base.cuh @@ -0,0 +1,149 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_HDIA_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla) +#define GEN_SPGPU_HDIA_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_vanilla) +#include "hdia_spmv_base_template.cuh" +#if 0 +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch) +#define GEN_SPGPU_HDIA_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "hdia_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch) +#define GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache_prefetch) +#include "hdia_spmv_base_template.cuh" +#undef GEN_SPGPU_HDIA_NAME +#undef USE_PREFETCHING +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache) +#define GEN_SPGPU_HDIA_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hdiaspmv_texcache) +#include "hdia_spmv_base_template.cuh" +#endif +#undef GEN_SPGPU_HDIA_NAME +#define GEN_SPGPU_HDIA_NAME(x) CONCAT(CONCAT(spgpu,x),hdiaspmv) +void +GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* dM, + const int* offsets, + int hackSize, + const int* hackOffsets, + int rows, + int cols, + const VALUE_TYPE *x, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + // maxNForACall should be a multiple of hackSize + maxNForACall = (maxNForACall/hackSize)*hackSize; + + while (rows > maxNForACall) //managing large vectors + { + + CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //if (avgDiags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //else + //if (avgDiags < 20) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + //else + //CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, maxNForACall, cols, x, beta); + + y = y + maxNForACall; + z = z + maxNForACall; + hackOffsets += maxNForACall/hackSize; + + rows -= maxNForACall; + } + CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + //if (avgDiags < 10 && handle->capabilityMajor > 1) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, dMPitch, rows, cols, diags, x, beta); + //else + //if (avgDiags < 20) + // CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + //else + //CONCAT(_,GEN_SPGPU_HDIA_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta); + + cudaCheckError("CUDA error on hdia_spmv"); +} + diff --git a/cuda/spgpu/kernels/hdia_spmv_base_template.cuh b/cuda/spgpu/kernels/hdia_spmv_base_template.cuh new file mode 100644 index 00000000..155179fd --- /dev/null +++ b/cuda/spgpu/kernels/hdia_spmv_base_template.cuh @@ -0,0 +1,253 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, + int rows, int cols, const VALUE_TYPE *x, VALUE_TYPE beta, int hackCount) +{ + int i = threadIdx.x + blockIdx.x * (blockDim.x); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows && CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + + // shared between offsetsChunks and warpHackOffsetTemp + extern __shared__ int dynShrMem[]; + + int hackOffset = 0; + int nextOffset = 0; + + unsigned int laneId = threadIdx.x % warpSize; + unsigned int warpId = threadIdx.x / warpSize; + +#if __CUDA_ARCH__ < 300 + int* warpHackOffset = dynShrMem; + + + if (laneId == 0 && i < rows) + { + warpHackOffset[warpId] = hackOffsets[hackId]; + warpHackOffset[warpId + (blockDim.x / warpSize)] = hackOffsets[hackId+1]; + } + + __syncthreads(); + hackOffset = warpHackOffset[warpId]; + nextOffset = warpHackOffset[warpId + blockDim.x / warpSize]; + __syncthreads(); +#elif __CUDA_ARCH__ < 700 + if (laneId == 0 && i < rows) + { + hackOffset = hackOffsets[hackId]; + nextOffset = hackOffsets[hackId+1]; + } + + hackOffset = __shfl(hackOffset, 0); + nextOffset = __shfl(nextOffset, 0); +#else + if (laneId == 0 && i < rows) + { + hackOffset = hackOffsets[hackId]; + nextOffset = hackOffsets[hackId+1]; + } + + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0); + nextOffset = __shfl_sync(0xFFFFFFFF,nextOffset, 0); + +#endif + + if (hackId >= hackCount) + return; + + dM += hackOffset*hackSize + hackLaneId; + offsets += hackOffset; + + // diags for this hack is next hackOffset minus current hackOffset + int diags = nextOffset - hackOffset; + + + // Warp oriented + int rounds = (diags + warpSize - 1)/warpSize; + + volatile int *offsetsChunk = dynShrMem + warpId*warpSize; + + for (int r = 0; r < rounds; r++) + { + // in the last round diags will be <= warpSize + if (laneId < diags) + offsetsChunk[laneId] = offsets[laneId]; + + if (i < rows) + { + int count = min(diags, warpSize); + +#ifdef USE_PREFETCHING + int j; + for (j=0; j<=count-2; j += 2) + { + // prefetch 2 values + int column1 = offsetsChunk[j] + i; + int column2 = offsetsChunk[j+1] + i; + + VALUE_TYPE xValue1, xValue2; + VALUE_TYPE mValue1, mValue2; + + bool inside1 = column1 >= 0 && column1 < cols; + bool inside2 = column2 >= 0 && column2 < cols; + + if(inside1) + { + mValue1 = dM[0]; +#ifdef ENABLE_CACHE + xValue1 = fetchTex (column1); +#else + xValue1 = x[column1]; +#endif + } + + dM += hackSize; + + if(inside2) + { + mValue2 = dM[0]; +#ifdef ENABLE_CACHE + xValue2 = fetchTex (column2); +#else + xValue2 = x[column2]; +#endif + } + + dM += hackSize; + + if(inside1) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue1, xValue1, zProd); + if(inside2) + zProd = CONCAT(VALUE_TYPE, _fma)(mValue2, xValue2, zProd); + } + + for (;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += hackSize; + } +#else + for (int j=0;j= 0 && column < cols) + { + VALUE_TYPE xValue; +#ifdef ENABLE_CACHE + xValue = fetchTex (column); +#else + xValue = x[column]; +#endif + VALUE_TYPE mValue = dM[0]; + zProd = CONCAT(VALUE_TYPE, _fma)(mValue, xValue, zProd); + } + + dM += hackSize; + } +#endif + + } + + diags -= warpSize; + offsets += warpSize; + } + + + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (i >= rows) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[i] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[i] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const VALUE_TYPE *x, int hackCount) +{ + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, CONCAT(zero_,VALUE_TYPE)(), hackCount); +} + +__global__ void +CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, const VALUE_TYPE *x, VALUE_TYPE beta, int hackCount) +{ + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _) + (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta, hackCount); +} + +void +CONCAT(_,GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* dM, const int* offsets, int hackSize, const int* hackOffsets, int rows, int cols, + const VALUE_TYPE *x, VALUE_TYPE beta) +{ + dim3 block (THREAD_BLOCK); + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + + int hackCount = (rows + hackSize - 1)/hackSize; + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, beta, hackCount); + else + CONCAT(GEN_SPGPU_HDIA_NAME(TYPE_SYMBOL), _krn_b0) <<< grid, block, block.x*sizeof(int), handle->currentStream >>> (z, y, alpha, dM, offsets, hackSize, hackOffsets, rows, cols, x, hackCount); + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} + diff --git a/cuda/spgpu/kernels/hdia_sspmv.cu b/cuda/spgpu/kernels/hdia_sspmv.cu new file mode 100644 index 00000000..7370f368 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hdia_zspmv.cu b/cuda/spgpu/kernels/hdia_zspmv.cu new file mode 100644 index 00000000..4f955ed9 --- /dev/null +++ b/cuda/spgpu/kernels/hdia_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hdia.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "hdia_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_cspmv.cu b/cuda/spgpu/kernels/hell_cspmv.cu new file mode 100644 index 00000000..86fbfc2b --- /dev/null +++ b/cuda/spgpu/kernels/hell_cspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuFloatComplex +#define TYPE_SYMBOL C +#define TEX_FETCH_TYPE cuFloatComplex +#include "hell_spmv_base.cuh" diff --git a/cuda/spgpu/kernels/hell_dspmv.cu b/cuda/spgpu/kernels/hell_dspmv.cu new file mode 100644 index 00000000..b06f8cfb --- /dev/null +++ b/cuda/spgpu/kernels/hell_dspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE double +#define TYPE_SYMBOL D +#define TEX_FETCH_TYPE int2 +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_spmv_base.cuh b/cuda/spgpu/kernels/hell_spmv_base.cuh new file mode 100644 index 00000000..ca074d33 --- /dev/null +++ b/cuda/spgpu/kernels/hell_spmv_base.cuh @@ -0,0 +1,159 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_HELL_NAME +#undef X_TEX +#define X_TEX CONCAT(x_tex_, FUNC_SUFFIX) + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif +#if 0 +// Texture cache management +texture < TEX_FETCH_TYPE, 1, cudaReadModeElementType > X_TEX; + +#define bind_tex_x(x) cudaBindTexture(NULL, X_TEX, x) +#define unbind_tex_x(x) cudaUnbindTexture(X_TEX) + +__device__ static VALUE_TYPE +fetchTex (int pointer) +{ + TEX_FETCH_TYPE fetch = tex1Dfetch (X_TEX, pointer); + return CONCAT(readValue_,VALUE_TYPE) (fetch); +} +#endif +#if __CUDA_ARCH__ < 300 +extern __shared__ int dynShrMem[]; +#endif + +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_vanilla) +#define GEN_SPGPU_HELL_NAME_VANILLA(x) CONCAT(CONCAT(spgpu,x),hellspmv_vanilla) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#if 0 +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_prefetch) +#define GEN_SPGPU_HELL_NAME_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hellspmv_prefetch) +#undef USE_PREFETCHING +#define USE_PREFETCHING +#include "hell_spmv_base_template.cuh" +#define ENABLE_CACHE +#undef GEN_SPGPU_HELL_NAME +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache_prefetch) +#define GEN_SPGPU_HELL_NAME_TEX_PREFETCH(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache_prefetch) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#undef USE_PREFETCHING +#endif +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache) +#define GEN_SPGPU_HELL_NAME_TEX(x) CONCAT(CONCAT(spgpu,x),hellspmv_texcache) +#include "hell_spmv_base_template.cuh" +#undef GEN_SPGPU_HELL_NAME +#define GEN_SPGPU_HELL_NAME(x) CONCAT(CONCAT(spgpu,x),hellspmv) +void +GEN_SPGPU_HELL_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + VALUE_TYPE* z, + const VALUE_TYPE *y, + VALUE_TYPE alpha, + const VALUE_TYPE* cM, + const int* rP, + int hackSize, + const __device int* hackOffsets, + const __device int* rS, + const __device int* rIdx, + int avgNnzPerRow, + int rows, + const VALUE_TYPE *x, + VALUE_TYPE beta, + int baseIndex) +{ + + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + // maxNForACall should be a multiple of hackSize + maxNForACall = (maxNForACall/hackSize)*hackSize; + //fprintf(stderr,"Entering kernel %d maxNForACall\n",maxNForACall); + + while (rows > maxNForACall) //managing large vectors + { + + CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); + /* if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + /* else if (avgNnzPerRow < 20) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + /* else */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, maxNForACall, x, beta, baseIndex); */ + + y = y + maxNForACall; + z = z + maxNForACall; + hackOffsets = hackOffsets + maxNForACall/hackSize; + rS = rS + maxNForACall; + + rows -= maxNForACall; + } + //fprintf(stderr,"Calling kernel on %d rows\n",rows); + CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); + //fprintf(stderr,"Done kernel on %d rows\n",rows); + /* if (avgNnzPerRow < 10 && handle->capabilityMajor > 1) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_VANILLA(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + /* else if (avgNnzPerRow < 20) */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + /* else */ + /* CONCAT(_,GEN_SPGPU_HELL_NAME_TEX_PREFETCH(TYPE_SYMBOL)) (handle, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, avgNnzPerRow, rows, x, beta, baseIndex); */ + + cudaCheckError("CUDA error on hell_spmv"); +} + diff --git a/cuda/spgpu/kernels/hell_spmv_base_template.cuh b/cuda/spgpu/kernels/hell_spmv_base_template.cuh new file mode 100644 index 00000000..9ecd8f74 --- /dev/null +++ b/cuda/spgpu/kernels/hell_spmv_base_template.cuh @@ -0,0 +1,357 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#define IDX2 +#define THREAD_BLOCK 128 + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) +(int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + __shared__ VALUE_TYPE temp[THREAD_BLOCK]; + + if (i < rows) { + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + int hackOffset; + unsigned int laneId = threadIdx.x % 32; +#if __CUDA_ARCH__ < 300 + // "volatile" used to avoid __syncthreads() + volatile int* warpHackOffset = dynShrMem; + + unsigned int warpId = threadIdx.x / 32; + + if (laneId == 0) + warpHackOffset[warpId] = hackOffsets[hackId]; + + hackOffset = warpHackOffset[warpId] + hackLaneId; +#elif __CUDA_ARCH__ < 700 + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl(hackOffset, 0) + hackLaneId; +#else + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0) + hackLaneId; +#endif + + rP += hackOffset; + cM += hackOffset; + + int rowSize = rS[i]; + int rowSizeM = rowSize / 2; + + if (threadIdx.y == 0) { + if (rowSize % 2) + ++rowSizeM; + } else { + rP += hackSize; + cM += hackSize; + } + + + for (int j = 0; j < rowSizeM; j++) { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += hackSize; + rP += hackSize; + + value = cM[0]; + cM += hackSize; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetch = fetchTex(pointer); +#else + fetch = x[pointer]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } + + // Reduction + if (threadIdx.y == 1) + temp[threadIdx.x] = zProd; + + __syncthreads(); + + if (threadIdx.y == 0) { + zProd = CONCAT(VALUE_TYPE, _add)(zProd, temp[threadIdx.x]); + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + } +} + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (int i, VALUE_TYPE yVal, int outRow, + VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + VALUE_TYPE zProd = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) { + + int hackId = i / hackSize; + int hackLaneId = i % hackSize; + + int hackOffset; + unsigned int laneId = threadIdx.x % 32; +#if __CUDA_ARCH__ < 300 + // "volatile" used to avoid __syncthreads() + volatile int* warpHackOffset = dynShrMem; + + unsigned int warpId = threadIdx.x / 32; + + if (laneId == 0) + warpHackOffset[warpId] = hackOffsets[hackId]; + + hackOffset = warpHackOffset[warpId] + hackLaneId; +#elif __CUDA_ARCH__ < 700 + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl(hackOffset, 0) + hackLaneId; +#else + if (laneId == 0) + hackOffset = hackOffsets[hackId]; + //__syncthreads(); + hackOffset = __shfl_sync(0xFFFFFFFF,hackOffset, 0) + hackLaneId; +#endif + + rP += hackOffset; + cM += hackOffset; + + int rowSize = rS[i]; + +#ifdef USE_PREFETCHING + for (int j = 0; j < rowSize / 2; j++) { + int pointers1, pointers2; + VALUE_TYPE values1, values2; + VALUE_TYPE fetches1, fetches2; + + pointers1 = rP[0] - baseIndex; + rP += hackSize; + pointers2 = rP[0] - baseIndex; + rP += hackSize; + + values1 = cM[0]; + cM += hackSize; + + values2 = cM[0]; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetches1 = fetchTex(pointers1); + fetches2 = fetchTex(pointers2); +#else + fetches1 = x[pointers1]; + fetches2 = x[pointers2]; +#endif + + // avoid MAD on pre-Fermi + zProd = CONCAT(VALUE_TYPE, _fma)(values1, fetches1, zProd); + zProd = CONCAT(VALUE_TYPE, _fma)(values2, fetches2, zProd); + } + + // odd row size + if (rowSize % 2) { + int pointer = rP[0] - baseIndex; + VALUE_TYPE value = cM[0]; + VALUE_TYPE fetch; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#else + for (int j = 0; j < rowSize; j++) { + int pointer; + VALUE_TYPE value; + VALUE_TYPE fetch; + + pointer = rP[0] - baseIndex; + rP += hackSize; + + value = cM[0]; + cM += hackSize; + +#ifdef ENABLE_CACHE + fetch = fetchTex (pointer); +#else + fetch = x[pointer]; +#endif + zProd = CONCAT(VALUE_TYPE, _fma)(value, fetch, zProd); + } +#endif + // Since z and y are accessed with the same offset by the same thread, + // and the write to z follows the y read, y and z can share the same base address (in-place computing). + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + z[outRow] = CONCAT(VALUE_TYPE, _fma)(beta, yVal, CONCAT(VALUE_TYPE, _mul) (alpha, zProd)); + else + z[outRow] = CONCAT(VALUE_TYPE, _mul)(alpha, zProd); + } + +} + +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, const int* rIdx, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + int outRow = 0; + if (i < rows) { + + outRow = rIdx[i]; + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[outRow]; + } +#if 1 + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, outRow, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#endif +} + + +__device__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int i = threadIdx.x + blockIdx.x * (THREAD_BLOCK); + + VALUE_TYPE yVal = CONCAT(zero_,VALUE_TYPE)(); + + if (i < rows) { + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + yVal = y[i]; + + } + +#ifdef IDX2 + if (blockDim.y == 1) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx_2) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _ridx) + (i, yVal, i, z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +#endif + +} + +// Force to recompile and optimize with llvm +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0) +(VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, int baseIndex) +{ + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, CONCAT(zero_,VALUE_TYPE)(), baseIndex); +} + +__global__ void +CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn) + (VALUE_TYPE *z, const VALUE_TYPE *y, VALUE_TYPE alpha, const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _) + (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); +} + +void +CONCAT(_,GEN_SPGPU_HELL_NAME(TYPE_SYMBOL)) +(spgpuHandle_t handle, VALUE_TYPE* z, const VALUE_TYPE *y, VALUE_TYPE alpha, + const VALUE_TYPE* cM, const int* rP, int hackSize, const int* hackOffsets, const int* rS, + const __device int* rIdx, int avgNnzPerRow, int rows, const VALUE_TYPE *x, VALUE_TYPE beta, int baseIndex) +{ + int avgThreshold; + + if (handle->capabilityMajor < 2) + avgThreshold = 8; + else if (handle->capabilityMajor < 3) + avgThreshold = 16; + else + avgThreshold = 32; +#ifdef IDX2 +#if defined(HELL_FORCE_THREADS_1) + dim3 block (THREAD_BLOCK, 1); +#elif defined(HELL_FORCE_THREADS_2) + dim3 block (THREAD_BLOCK, 2); +#else + dim3 block (THREAD_BLOCK, avgNnzPerRow >= avgThreshold ? 2 : 1); +#endif +#else + dim3 block (THREAD_BLOCK, 1); +#endif + dim3 grid ((rows + THREAD_BLOCK - 1) / THREAD_BLOCK); + + // Should we generalize the code to 1/2/4/8 threads per row? + // And maybe adjust THREAD_BLOCK size? + int shrMemSize; + shrMemSize=THREAD_BLOCK*sizeof(VALUE_TYPE); + +#ifdef ENABLE_CACHE + bind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + + if (rIdx) { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx), cudaFuncCachePreferL1); + + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_ridx) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rIdx, rows, x, beta, baseIndex); + } else { + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn), cudaFuncCachePreferL1); + cudaFuncSetCacheConfig(CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0), cudaFuncCachePreferL1); + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, beta, baseIndex); + else + CONCAT(GEN_SPGPU_HELL_NAME(TYPE_SYMBOL), _krn_b0) + <<< grid, block, shrMemSize, handle->currentStream >>> (z, y, alpha, cM, rP, hackSize, hackOffsets, rS, rows, x, baseIndex); + } + +#ifdef ENABLE_CACHE + unbind_tex_x ((const TEX_FETCH_TYPE *) x); +#endif + +} diff --git a/cuda/spgpu/kernels/hell_sspmv.cu b/cuda/spgpu/kernels/hell_sspmv.cu new file mode 100644 index 00000000..8d296b81 --- /dev/null +++ b/cuda/spgpu/kernels/hell_sspmv.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#define TEX_FETCH_TYPE float +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/hell_zspmv.cu b/cuda/spgpu/kernels/hell_zspmv.cu new file mode 100644 index 00000000..784ab719 --- /dev/null +++ b/cuda/spgpu/kernels/hell_zspmv.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + +extern "C" +{ +#include "core.h" +#include "hell.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#define TEX_FETCH_TYPE int4 +#include "hell_spmv_base.cuh" + diff --git a/cuda/spgpu/kernels/igath.cu b/cuda/spgpu/kernels/igath.cu new file mode 100644 index 00000000..8c0fe1b6 --- /dev/null +++ b/cuda/spgpu/kernels/igath.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "gath_base.cuh" diff --git a/cuda/spgpu/kernels/iscat.cu b/cuda/spgpu/kernels/iscat.cu new file mode 100644 index 00000000..2001a740 --- /dev/null +++ b/cuda/spgpu/kernels/iscat.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "scat_base.cuh" diff --git a/cuda/spgpu/kernels/isetscal.cu b/cuda/spgpu/kernels/isetscal.cu new file mode 100644 index 00000000..d54eae95 --- /dev/null +++ b/cuda/spgpu/kernels/isetscal.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE int +#define TYPE_SYMBOL I +#include "setscal_base.cuh" diff --git a/cuda/spgpu/kernels/mathbase.cuh b/cuda/spgpu/kernels/mathbase.cuh new file mode 100644 index 00000000..e7d77778 --- /dev/null +++ b/cuda/spgpu/kernels/mathbase.cuh @@ -0,0 +1,53 @@ +#pragma once + +__device__ __host__ static float zero_float() { return 0.0f; } +__device__ __host__ static cuFloatComplex zero_cuFloatComplex() { return make_cuFloatComplex(0.0, 0.0); } +__device__ __host__ static bool float_isNotZero(float x) { return x != 0.0f; } +__device__ __host__ static bool float_isZero(float x) { return x == 0.0f; } +__device__ __host__ static bool int_isZero(int x) { return x == 0; } +__device__ __host__ static bool int_isNotZero(int x) { return x != 0; } + +__device__ static int int_fma(int a, int b, int c) { return ((a*b)+c); } + +__device__ static float float_fma(float a, float b, float c) { return PREC_FADD(PREC_FMUL (a, b), c); } +__device__ static float float_add(float a, float b) { return PREC_FADD (a, b); } +__device__ static float float_mul(float a, float b) { return PREC_FMUL (a, b); } +__device__ static float float_abs(float a) { return fabsf(a); } + +__device__ static cuFloatComplex cuFloatComplex_fma(cuFloatComplex a, cuFloatComplex b, cuFloatComplex c) { return cuCfmaf(a, b, c); } +__device__ static cuFloatComplex cuFloatComplex_add(cuFloatComplex a, cuFloatComplex b) { return cuCaddf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_mul(cuFloatComplex a, cuFloatComplex b) { return cuCmulf(a, b); } +__device__ static cuFloatComplex cuFloatComplex_abs(cuFloatComplex a) { return make_cuFloatComplex(cuCabsf(a),0); } + +//__device__ static float cuFloatComplex_abs(cuFloatComplex a) { return cuCabsf(a); } + +__device__ static float readValue_float(float fetch) { return fetch; } +__device__ static cuFloatComplex readValue_cuFloatComplex(cuFloatComplex fetch) { return fetch; } + +// host or c.c >= 1.3 +#if (__CUDA_ARCH__ >= 130) || (!__CUDA_ARCH__) +__device__ __host__ static double zero_double() { return 0.0; } +__device__ __host__ static cuDoubleComplex zero_cuDoubleComplex() { return make_cuDoubleComplex(0.0, 0.0); } +__device__ __host__ static bool double_isNotZero(double x) { return x != 0.0; } +__device__ __host__ static bool double_isZero(double x) { return x == 0.0; } + +__device__ static double double_fma(double a, double b, double c) { return PREC_DADD(PREC_DMUL (a, b), c); } +__device__ static double double_add(double a, double b) { return PREC_DADD (a, b); } +__device__ static double double_mul(double a, double b) { return PREC_DMUL (a, b); } +__device__ static double double_abs(double a) { return fabs (a); } + +__device__ static cuDoubleComplex cuDoubleComplex_fma(cuDoubleComplex a, cuDoubleComplex b, cuDoubleComplex c) { return cuCfma(a, b, c); } +__device__ static cuDoubleComplex cuDoubleComplex_add(cuDoubleComplex a, cuDoubleComplex b) { return cuCadd(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_mul(cuDoubleComplex a, cuDoubleComplex b) { return cuCmul(a, b); } +__device__ static cuDoubleComplex cuDoubleComplex_abs(cuDoubleComplex a) { return make_cuDoubleComplex(cuCabs(a),0); } +//__device__ static double cuDoubleComplex_abs(cuDoubleComplex a) { return cuCabs(a); } + +__device__ static double readValue_double(int2 fetch) { return __hiloint2double (fetch.y, fetch.x); } +__device__ static cuDoubleComplex readValue_cuDoubleComplex(int4 fetch) +{ + cuDoubleComplex c; + c.x = __hiloint2double (fetch.y, fetch.x); + c.y = __hiloint2double (fetch.w, fetch.z); + return c; +} +#endif diff --git a/cuda/spgpu/kernels/sabgdxyz.cu b/cuda/spgpu/kernels/sabgdxyz.cu new file mode 100644 index 00000000..8c137ed3 --- /dev/null +++ b/cuda/spgpu/kernels/sabgdxyz.cu @@ -0,0 +1,79 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, float delta, + float* x, float *y, float *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + float t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (beta == 0.0) + t = PREC_FMUL(alpha,x[id]); + else + t = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + if (delta == 0.0) + z[id] = gamma * t; + else + z[id] = PREC_FADD(PREC_FMUL(gamma, t), PREC_FMUL(delta,z[id])); + y[id] = t; + } +} + + +void spgpuSabgdxyz(spgpuHandle_t handle, + int n, + float alpha, + float beta, + float gamma, + float delta, + __device float* x, + __device float* y, + __device float *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/sabs.cu b/cuda/spgpu/kernels/sabs.cu new file mode 100644 index 00000000..e335e773 --- /dev/null +++ b/cuda/spgpu/kernels/sabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define RES_VALUE_TYPE float +#define TYPE_SYMBOL S +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/samax.cu b/cuda/spgpu/kernels/samax.cu new file mode 100644 index 00000000..aaef17e2 --- /dev/null +++ b/cuda/spgpu/kernels/samax.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "amax_base.cuh" + diff --git a/cuda/spgpu/kernels/sasum.cu b/cuda/spgpu/kernels/sasum.cu new file mode 100644 index 00000000..33392203 --- /dev/null +++ b/cuda/spgpu/kernels/sasum.cu @@ -0,0 +1,32 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "asum_base.cuh" + diff --git a/cuda/spgpu/kernels/saxpby.cu b/cuda/spgpu/kernels/saxpby.cu new file mode 100644 index 00000000..2f06e39c --- /dev/null +++ b/cuda/spgpu/kernels/saxpby.cu @@ -0,0 +1,150 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + + +#if 1 +__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + if (beta == 0.0f) { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = PREC_FMUL(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } + } +} + +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +#else + +__global__ void spgpuSaxpby_krn(float *z, int n, float beta, float *y, float alpha, float* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (beta == 0.0f) + z[id] = PREC_FMUL(alpha,x[id]); + else + z[id] = PREC_FADD(PREC_FMUL(alpha, x[id]), PREC_FMUL(beta,y[id])); + } +} + + + +void spgpuSaxpby_(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuSaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuSaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuSaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on saxpby"); +} +#endif +void spgpuSmaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x, + int count, int pitch) +{ + + for (int i=0; icurrentStream>>>(y, n, alpha, x); +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + __device VALUE_TYPE *y, + int n, + VALUE_TYPE alpha, + __device VALUE_TYPE *x) +{ + + int maxNForACall = max(handle->maxGridSizeX, THREAD_BLOCK*handle->maxGridSizeX); + + while (n > maxNForACall) //managing large vectors + { + CONCAT(_,GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL))(handle, y, maxNForACall, alpha, x); + x = x + maxNForACall; + y = y + maxNForACall; + n -= maxNForACall; + } + + CONCAT(_,GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL))(handle, y, n, alpha, x); + + cudaCheckError("CUDA error on scal"); +} diff --git a/cuda/spgpu/kernels/scat_base.cuh b/cuda/spgpu/kernels/scat_base.cuh new file mode 100644 index 00000000..f6f619b1 --- /dev/null +++ b/cuda/spgpu/kernels/scat_base.cuh @@ -0,0 +1,89 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#define PRE_CONCAT(A, B) A ## B +#define CONCAT(A, B) PRE_CONCAT(A, B) + +#undef GEN_SPGPU_FUNC_NAME +#define GEN_SPGPU_FUNC_NAME(x) CONCAT(CONCAT(spgpu,x),scat) + +#define BLOCK_SIZE 256 + +// Define: +//#define VALUE_TYPE +//#define TYPE_SYMBOL + +#include "mathbase.cuh" + +__global__ void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern) + (VALUE_TYPE* vector, int count, const int* indices, const VALUE_TYPE* values, int firstIndex, VALUE_TYPE beta) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < count) + { + int pos = indices[id]-firstIndex; + if (pos < 0) + return; + + if (CONCAT(VALUE_TYPE, _isNotZero(beta))) + vector[pos] = CONCAT(VALUE_TYPE, _fma)(beta, vector[pos], values[id]); + else + vector[pos] = values[id]; + } +} + +void +CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (spgpuHandle_t handle, VALUE_TYPE* y, int xNnz, const VALUE_TYPE *xValues, + const __device int *xIndices, int xBaseIndex, VALUE_TYPE beta) +{ + int msize = (xNnz+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_kern)<<currentStream>>>(y, xNnz, xIndices, xValues, xBaseIndex, beta); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) +(spgpuHandle_t handle, + __device VALUE_TYPE* y, + int xNnz, + const __device VALUE_TYPE *xValues, + const __device int *xIndices, + int xBaseIndex, + VALUE_TYPE beta) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (xNnz > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, maxNForACall, xValues, xIndices, xBaseIndex, beta); + xIndices += maxNForACall; + xValues += maxNForACall; + xNnz -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, y, xNnz, xValues, xIndices, xBaseIndex, beta); + cudaCheckError("CUDA error on scat"); +} + diff --git a/cuda/spgpu/kernels/sdot.cu b/cuda/spgpu/kernels/sdot.cu new file mode 100644 index 00000000..c19c7710 --- /dev/null +++ b/cuda/spgpu/kernels/sdot.cu @@ -0,0 +1,175 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +//#define USE_CUBLAS + +#ifdef USE_CUBLAS +#include "cublas.h" +#endif + +#define BLOCK_SIZE 320 +//#define BLOCK_SIZE 512 + +//#define ASSUME_LOCK_SYNC_PARALLELISM + +#ifndef USE_CUBLAS +static __device__ float sdotReductionResult[128]; +#endif + +__global__ void spgpuSdot_kern(int n, float* x, float* y) +{ + __shared__ float sSum[BLOCK_SIZE]; + + float res = 0; + + float* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + y += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + int numSteps = (lastX - x + blockOffset - 1)/blockOffset; + + // prefetching + for (int j = 0; j < numSteps / 2; j++) + { + float x1 = x[0]; x += blockOffset; + float y1 = y[0]; y += blockOffset; + float x2 = x[0]; x += blockOffset; + float y2 = y[0]; y += blockOffset; + + res = PREC_FADD(res, PREC_FMUL(x1,y1)); + res = PREC_FADD(res, PREC_FMUL(x2,y2)); + + } + + if (numSteps % 2) + { + res = PREC_FADD(res, PREC_FMUL(*x,*y)); + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + spgpuSdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, sdotReductionResult, blocks*sizeof(float)); + + for (int i=0; icurrentStream>>>(y, n, val); + +} + +void +GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL) + (spgpuHandle_t handle, + int first, + int last, + int baseIndex, + VALUE_TYPE val, + __device VALUE_TYPE* y) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + int n=last-first+1; + y += (first-baseIndex); + while (n > maxNForACall) //managing large vectors + { + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, maxNForACall, val,y); + y += maxNForACall; + n -= maxNForACall; + } + + CONCAT(GEN_SPGPU_FUNC_NAME(TYPE_SYMBOL),_) + (handle, n, val,y); + cudaCheckError("CUDA error on scat"); +} + diff --git a/cuda/spgpu/kernels/sgath.cu b/cuda/spgpu/kernels/sgath.cu new file mode 100644 index 00000000..d1980bc6 --- /dev/null +++ b/cuda/spgpu/kernels/sgath.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE float +#define TYPE_SYMBOL S +#include "gath_base.cuh" diff --git a/cuda/spgpu/kernels/snrm2.cu b/cuda/spgpu/kernels/snrm2.cu new file mode 100644 index 00000000..27b8a6e9 --- /dev/null +++ b/cuda/spgpu/kernels/snrm2.cu @@ -0,0 +1,166 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudalang.h" +#include "cudadebug.h" + + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +//#define USE_CUBLAS + +#define BLOCK_SIZE 320 +//#define BLOCK_SIZE 512 + +//#define ASSUME_LOCK_SYNC_PARALLELISM + +static __device__ float snrm2ReductionResult[128]; + +__global__ void spgpuSnrm2_kern(int n, float* x) +{ + __shared__ float sSum[BLOCK_SIZE]; + + float res = 0; + + float* lastX = x + n; + + x += threadIdx.x + blockIdx.x*BLOCK_SIZE; + + int blockOffset = gridDim.x*BLOCK_SIZE; + + int numSteps = (lastX - x + blockOffset - 1)/blockOffset; + + // prefetching + for (int j = 0; j < numSteps / 2; j++) + { + float x1 = x[0]; x += blockOffset; + float x2 = x[0]; x += blockOffset; + + res = PREC_FADD(res, PREC_FMUL(x1,x1)); + res = PREC_FADD(res, PREC_FMUL(x2,x2)); + + } + + if (numSteps % 2) + { + float x1 = x[0]; + res = PREC_FADD(res, PREC_FMUL(x1,x1)); + } + + if (threadIdx.x >= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + float tRes[128]; + + spgpuSnrm2_kern<<currentStream>>>(n, x); + cudaMemcpyFromSymbol(tRes, snrm2ReductionResult,blocks*sizeof(float)); + + for (int i=0; i + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuSxyzw_krn(int n, float a, float b, + float c, float d, + float e, float f, + float * x, float *y, + float *z, float *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + float ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = PREC_FADD(PREC_FMUL(a, x[id]), PREC_FMUL(b,y[id])); + tz = PREC_FADD(PREC_FMUL(c, ty), PREC_FMUL(d,z[id])); + w[id] = PREC_FADD(PREC_FMUL(e, tz), PREC_FMUL(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuSxyzw(spgpuHandle_t handle, + int n, + float a, float b, + float c, float d, + float e, float f, + __device float * x, + __device float * y, + __device float * z, + __device float *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuSxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/kernels/zabgdxyz.cu b/cuda/spgpu/kernels/zabgdxyz.cu new file mode 100644 index 00000000..48def937 --- /dev/null +++ b/cuda/spgpu/kernels/zabgdxyz.cu @@ -0,0 +1,80 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex alpha, cuDoubleComplex beta, + cuDoubleComplex gamma, cuDoubleComplex delta, + cuDoubleComplex * x, cuDoubleComplex *y, cuDoubleComplex *z) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuDoubleComplex t; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + if (cuDoubleComplex_isZero(beta)) + t = cuCmul(alpha,x[id]); + else + t = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + if (cuDoubleComplex_isZero(delta)) + z[id] = cuCmul(gamma, t); + else + z[id] = cuCfma(gamma, t, cuCmul(delta,z[id])); + y[id] = t; + } +} + + +void spgpuZabgdxyz(spgpuHandle_t handle, + int n, + cuDoubleComplex alpha, + cuDoubleComplex beta, + cuDoubleComplex gamma, + cuDoubleComplex delta, + __device cuDoubleComplex * x, + __device cuDoubleComplex * y, + __device cuDoubleComplex *z) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZabgdxyz_krn<<currentStream>>>(n, alpha, beta, gamma, delta, + x, y, z); +} + diff --git a/cuda/spgpu/kernels/zabs.cu b/cuda/spgpu/kernels/zabs.cu new file mode 100644 index 00000000..ba0deba6 --- /dev/null +++ b/cuda/spgpu/kernels/zabs.cu @@ -0,0 +1,33 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2015 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define RES_VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "abs_base.cuh" + diff --git a/cuda/spgpu/kernels/zamax.cu b/cuda/spgpu/kernels/zamax.cu new file mode 100644 index 00000000..b4267180 --- /dev/null +++ b/cuda/spgpu/kernels/zamax.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "amax_base.cuh" diff --git a/cuda/spgpu/kernels/zasum.cu b/cuda/spgpu/kernels/zasum.cu new file mode 100644 index 00000000..ffa28a8d --- /dev/null +++ b/cuda/spgpu/kernels/zasum.cu @@ -0,0 +1,31 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2014 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "stdio.h" +#include "cudadebug.h" +#include "cudalang.h" + +extern "C" +{ +#include "core.h" +#include "vector.h" +} + +#include "debug.h" + +#define VALUE_TYPE cuDoubleComplex +#define TYPE_SYMBOL Z +#include "asum_base.cuh" diff --git a/cuda/spgpu/kernels/zaxpby.cu b/cuda/spgpu/kernels/zaxpby.cu new file mode 100644 index 00000000..8efc40d2 --- /dev/null +++ b/cuda/spgpu/kernels/zaxpby.cu @@ -0,0 +1,149 @@ +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "cudadebug.h" +#include "cudalang.h" +#include "cuComplex.h" + + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); + //#include "cuda_util.h" +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +#if 1 +__global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + if (cuDoubleComplex_isZero(beta)) { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + z[id] = cuCmul(alpha,x[id]); + } + } else { + for ( ; id < n; id +=gridSize) + //if (id,n) + { + z[id] = cuCfma(beta, y[id], cuCmul(alpha, x[id])); + } + } +} + +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} +#else +__global__ void spgpuZaxpby_krn(cuDoubleComplex *z, int n, cuDoubleComplex beta, cuDoubleComplex *y, cuDoubleComplex alpha, cuDoubleComplex* x) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + + if (id < n) + { + // Since z, x and y are accessed with the same offset by the same thread, + // and the write to z follows the x and y read, x, y and z can share the same base address (in-place computing). + + if (cuDoubleComplex_isZero(beta)) + z[id] = cuCmul(alpha,x[id]); + else + z[id] = cuCfma(alpha, x[id], cuCmul(beta,y[id])); + } +} + + +void spgpuZaxpby_(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + + dim3 block(BLOCK_SIZE); + dim3 grid(msize); + + spgpuZaxpby_krn<<currentStream>>>(z, n, beta, y, alpha, x); +} + +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x) +{ + int maxNForACall = max(handle->maxGridSizeX, BLOCK_SIZE*handle->maxGridSizeX); + while (n > maxNForACall) //managing large vectors + { + spgpuZaxpby_(handle, z, maxNForACall, beta, y, alpha, x); + + x = x + maxNForACall; + y = y + maxNForACall; + z = z + maxNForACall; + n -= maxNForACall; + } + + spgpuZaxpby_(handle, z, n, beta, y, alpha, x); + + cudaCheckError("CUDA error on daxpby"); +} +#endif +void spgpuZmaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + int count, int pitch) +{ + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + cuDoubleComplex tRes[128]; + + spgpuZdot_kern<<currentStream>>>(n, a, b); + cudaMemcpyFromSymbol(tRes, ddotReductionResult,blocks*sizeof(cuDoubleComplex)); + + for (int i=0; i= 32) + sSum[threadIdx.x] = res; + + __syncthreads(); + + + // Start reduction! + + if (threadIdx.x < 32) + { + for (int i=1; imultiProcessorCount, (n+BLOCK_SIZE-1)/BLOCK_SIZE)); +#endif + + double tRes[128]; + + spgpuZnrm2_kern<<currentStream>>>(n, x);; + cudaMemcpyFromSymbol(tRes, dnrm2ReductionResult,blocks*sizeof(double)); + + for (int i=0; i + +extern "C" +{ +#include "core.h" +#include "vector.h" + int getGPUMultiProcessors(); + int getGPUMaxThreadsPerMP(); +} + + +#include "debug.h" + +#define BLOCK_SIZE 512 + +__global__ void spgpuZxyzw_krn(int n, cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + cuDoubleComplex * x, cuDoubleComplex *y, + cuDoubleComplex *z, cuDoubleComplex *w) +{ + int id = threadIdx.x + BLOCK_SIZE*blockIdx.x; + unsigned int gridSize = blockDim.x * gridDim.x; + cuDoubleComplex ty, tz; + for ( ; id < n; id +=gridSize) + //if (id,n) + { + + ty = cuCfma(a, x[id], cuCmul(b,y[id])); + tz = cuCfma(c, ty, cuCmul(d,z[id])); + w[id] = cuCfma(e, tz, cuCmul(f,w[id])); + y[id] = ty; + z[id] = tz; + } +} + + +void spgpuZxyzw(spgpuHandle_t handle, + int n, + cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + __device cuDoubleComplex * x, + __device cuDoubleComplex * y, + __device cuDoubleComplex * z, + __device cuDoubleComplex *w) +{ + int msize = (n+BLOCK_SIZE-1)/BLOCK_SIZE; + int num_mp, max_threads_mp, num_blocks_mp, num_blocks; + dim3 block(BLOCK_SIZE); + num_mp = getGPUMultiProcessors(); + max_threads_mp = getGPUMaxThreadsPerMP(); + num_blocks_mp = max_threads_mp/BLOCK_SIZE; + num_blocks = num_blocks_mp*num_mp; + dim3 grid(num_blocks); + + spgpuZxyzw_krn<<currentStream>>>(n, a,b,c,d,e,f, + x, y, z,w); +} + diff --git a/cuda/spgpu/vector.h b/cuda/spgpu/vector.h new file mode 100644 index 00000000..d08e6edd --- /dev/null +++ b/cuda/spgpu/vector.h @@ -0,0 +1,1326 @@ +#pragma once + +/* + * spGPU - Sparse matrices on GPU library. + * + * Copyright (C) 2010 - 2012 + * Davide Barbieri - University of Rome Tor Vergata + * Salvatore Filippone - University of Rome Tor Vergata + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 3 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include "core.h" + +/** \addtogroup vecFun Vectors and sparse vectors routines + * @{ + */ + + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* \fn void spgpuIgath(spgpuHandle_t handle, __device int *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device int* y) + * Integer gather from y to sparse(x). Computes the integer gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuIgath(spgpuHandle_t handle, + __device int *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device int* y); + +/** +* \fn void spgpuIscat(spgpuHandle_t handle, __device int* y, int xNnz, const __device int *xValues, const __device int *xIndices, int xBaseIndex, int beta) + * Integer scatter from sparse(x) to y. Computes the integer scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuIscat(spgpuHandle_t handle, + __device int* y, + int xNnz, + const __device int *xValues, + const __device int *xIndices, + int xBaseIndex, int beta); + + + + +/** +* \fn float spgpuSdot (spgpuHandle_t handle, int n, __device float* a, __device float* b) + * Computes single precision dot product of a and b vectors. + * \param handle The spgpu handle used to call this routine + * \param n the vectors length + * \param a the first input vector + * \param b the second input vector + * \return the dot product + */ +float spgpuSdot(spgpuHandle_t handle, + int n, + __device float* a, + __device float* b); + +/** +* \fn float spgpuSmdot (spgpuHandle_t handle, float* y, int n, __device float* a, __device float* b, int count, int pitch) + * Computes single precision dot product of a and b multivectors. + * \param handle the spgpu handle used to call this routine + * \param y the result, made by dot products of every vector couples from the multivectors a and b + * \param n the vectors' length + * \param a the first input multivector + * \param b the second input multivector + * \param count the number of vectors in every multivector + * \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.). + */ +void spgpuSmdot(spgpuHandle_t handle, + float* y, + int n, + __device float* a, + __device float* b, + int count, + int pitch); + + +/** +* \fn void spgpuSabs (spgpuHandle_t handle, __device float* y, int n, float alpha, __device float* x) + * Computes single precision (y = alpha * abs(x)) for each element in x. + * \param handle The spgpu handle used to call this routine + * \param y the resulting vector (could be x) + * \param n the vectors length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuSabs(spgpuHandle_t handle, + __device float* y, + int n, + float alpha, + __device float* x); + +/** +* \fn float spgpuSnrm2(spgpuHandle_t handle, int n, __device float* x) + * Computes the single precision Euclidean vector norm of x. + * \param handle the spgpu handle used to call this routine + * \param n the vector's length + * \param x the input vector + * \return the euclidean vector norm + */ +float spgpuSnrm2(spgpuHandle_t handle, + int n, + __device float* x); + +/** +* \fn void spgpuSmnrm2(spgpuHandle_t handle, float *y, int n, __device float *x, int count, int pitch) + * Computes the single precision Euclidean vector norm for every vector in the multivector x. + * \param handle the spgpu handle used to call this routine + * \param y the array of results + * \param n the vectors' length in the x multivector + * \param x the input multivector + * \param count the number of vectors in x + * \param pitch the multivector's pitch + */ +void spgpuSmnrm2(spgpuHandle_t handle, + float *y, + int n, + __device float *x, + int count, + int pitch); + + +/** +* \fn void spgpuSscal(spgpuHandle_t handle, __device float *y, int n, float alpha, __device float *x) + * Computes the single precision y = alpha * x. y could be exactly x (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param y the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuSscal(spgpuHandle_t handle, + __device float *y, + int n, + float alpha, + __device float *x); + +/** +* \fn void spgpuSaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x) + * Computes the single precision z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + */ +void spgpuSaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x); + + +void spgpuSabgdxyz(spgpuHandle_t handle, + int n, + float alpha, + float beta, + float gamma, + float delta, + __device float* x, + __device float *y, + __device float *z) +; + + void spgpuSxyzw(spgpuHandle_t handle, + int n, + float a, float b, + float c, float d, + float e, float f, + __device float* x, + __device float *y, + __device float *z, + __device float *w) +; + +/** +* \fn void spgpuSmaxpby(spgpuHandle_t handle, __device float *z, int n, float beta, __device float *y, float alpha, __device float* x, int count, int pitch) + * Computes the single precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ + +void spgpuSmaxpby(spgpuHandle_t handle, + __device float *z, + int n, + float beta, + __device float *y, + float alpha, + __device float* x, + int count, int pitch); + +/** +* \fn void spgpuSaxy(spgpuHandle_t handle, __device float *z, int n, float alpha, __device float *x, __device float* y) + * Computes the single precision z = alpha * x * y. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the first input vector + * \param y the second input vector + */ +void spgpuSaxy(spgpuHandle_t handle, + __device float *z, + int n, + float alpha, + __device float *x, + __device float *y); + +/** +* \fn void spgpuSaxypbz(spgpuHandle_t handle, __device float *w, int n, float beta, __device float *z, float alpha, __device float* x, __device float *y) + * Computes the single precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + */ +void spgpuSaxypbz(spgpuHandle_t handle, + __device float *w, + int n, + float beta, + __device float *z, + float alpha, + __device float* x, + __device float *y); + +/** +* \fn void spgpuSmaxy(spgpuHandle_t handle, __device float *z, int n, float alpha, __device float *x, __device float* y, int count, int pitch) + * Computes the single precision z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting multivector + * \param n the vectors' length in the multivectors + * \param alpha the alpha value + * \param x the first input multivector + * \param y the second input multivector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ +void spgpuSmaxy(spgpuHandle_t handle, + __device float *z, + int n, + float alpha, + __device float* x, + __device float *y, + int count, + int pitch); + +/** +* \fn void spgpuSmaxypbz(spgpuHandle_t handle, __device float *w, int n, float beta, __device float *z, float alpha, __device float* x, __device float *y, int count, int pitch) + * Computes the single precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + * \param count the number of vectors in w,z,x and y multivectors + * \param pitch the multivectors' pitch + */ + +void spgpuSmaxypbz(spgpuHandle_t handle, + __device float *w, + int n, + float beta, + __device float *z, + float alpha, + __device float* x, + __device float *y, + int count, + int pitch); + +/** +* \fn void spgpuSgath(spgpuHandle_t handle, __device float *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device float* y) + * Single precision gather from y to sparse(x). Computes the single precision gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuSgath(spgpuHandle_t handle, + __device float *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device float* y); + +/** +* \fn void spgpuSscat(spgpuHandle_t handle, __device float* y, int xNnz, const __device float *xValues, const __device int *xIndices, int xBaseIndex, float beta) + * Single precision scatter from sparse(x) to y. Computes the single precision scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuSscat(spgpuHandle_t handle, + __device float* y, + int xNnz, + const __device float *xValues, + const __device int *xIndices, + int xBaseIndex, float beta); + + +float spgpuSasum(spgpuHandle_t handle, + int n, + float* x); + +float spgpuSamax(spgpuHandle_t handle, + int n, + float* x); + +void spgpuSmasum(spgpuHandle_t handle, + float* y, + int n, + float* x, + int count, + int pitch); + +void spgpuSmamax(spgpuHandle_t handle, + float* y, + int n, + float* x, + int count, + int pitch); + + +/** +* \fn void spgpuDscal(spgpuHandle_t handle, __device double *y, int n, double alpha, __device double *x) + * Computes the Double precision y = alpha * x. y could be exactly x (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param y the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuDscal(spgpuHandle_t handle, + __device double *y, + int n, + double alpha, + __device double *x); + +/** +* \fn float spgpuDdot (spgpuHandle_t handle, int n, __device double* a, __device double* b) + * Computes double precision dot product of a and b vectors. + * \param handle The spgpu handle used to call this routine + * \param n the vectors length + * \param a the first input vector + * \param b the second input vector + * \return the dot product + */ + double spgpuDdot(spgpuHandle_t handle, + int n, + __device double* a, + __device double* b); + +/** +* \fn void spgpuDabs (spgpuHandle_t handle, __device double* y, int n, double alpha, __device double* x) + * Computes double precision (y = alpha * abs(x)) for each element in x. + * \param handle The spgpu handle used to call this routine + * \param y the resulting vector (could be x) + * \param n the vectors length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuDabs(spgpuHandle_t handle, + __device double* y, + int n, + double alpha, + __device double* x); + +/** +* \fn float spgpuDmdot (spgpuHandle_t handle, double* y, int n, __device double* a, __device double* b, int count, int pitch) + * Computes double precision dot product of a and b multivectors. + * \param handle the spgpu handle used to call this routine + * \param y the result, made by dot products of every vector couples from the multivectors a and b + * \param n the vectors' length + * \param a the first input multivector + * \param b the second input multivector + * \param count the number of vectors in every multivector + * \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.). + */ +void spgpuDmdot(spgpuHandle_t handle, + double* y, + int n, + __device double* a, + __device double* b, + int count, + int pitch); + + +/** +* \fn double spgpuDnrm2(spgpuHandle_t handle, int n, __device double* x) + * Computes the double precision Euclidean vector norm of x. + * \param handle the spgpu handle used to call this routine + * \param n the vector's length + * \param x the input vector + * \return the euclidean vector norm + */ + double spgpuDnrm2(spgpuHandle_t handle, + int n, + __device double* x); + +/** +* \fn void spgpuDmnrm2(spgpuHandle_t handle, double *y, int n, __device double *x, int count, int pitch) + * Computes the double precision Euclidean vector norm for every vector in the multivector x. + * \param handle the spgpu handle used to call this routine + * \param y the array of results + * \param n the vectors' length in the x multivector + * \param x the input multivector + * \param count the number of vectors in x + * \param pitch the multivector's pitch + */ + +void spgpuDmnrm2(spgpuHandle_t handle, + double *y, + int n, + __device double *x, + int count, + int pitch); + +/** +* \fn void spgpuDaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, __device double *y, double alpha, __device double* x) + * Computes the double precision z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + */ +void spgpuDaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x); + + +void spgpuDabgdxyz(spgpuHandle_t handle, + int n, + double alpha, + double beta, + double gamma, + double delta, + __device double* x, + __device double *y, + __device double *z) +; + + void spgpuDxyzw(spgpuHandle_t handle, + int n, + double a, double b, + double c, double d, + double e, double f, + __device double* x, + __device double *y, + __device double *z, + __device double *w) +; + + /** +* \fn void spgpuDmaxpby(spgpuHandle_t handle, __device double *z, int n, double beta, __device double *y, double alpha, __device double* x, int count, int pitch) + * Computes the double precision z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivector's pitch + */ + + void spgpuDmaxpby(spgpuHandle_t handle, + __device double *z, + int n, + double beta, + __device double *y, + double alpha, + __device double* x, + int count, int pitch); + +/** +* \fn void spgpuDaxy(spgpuHandle_t handle, __device double *z, int n, double alpha, __device double *x, __device double* y) + * Computes the double precision z = alpha * x * y. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length in the x multivector + * \param alpha the alpha value + * \param x the second input vector + * \param y the first input vector + */ +void spgpuDaxy(spgpuHandle_t handle, + __device double *z, + int n, + double alpha, + __device double *x, + __device double *y); + +/** +* \fn void spgpuDaxypbz(spgpuHandle_t handle, __device double *w, int n, double beta, __device double *z, double alpha, __device double* x, __device double *y) + * Computes the double precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + */void spgpuDaxypbz(spgpuHandle_t handle, + __device double *w, + int n, + double beta, + __device double *z, + double alpha, + __device double* x, + __device double *y); + +/** +* \fn void spgpuDmaxy(spgpuHandle_t handle, __device double *z, int n, double alpha, __device double *x, __device double* y, int count, int pitch) + * Computes the double precision z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting multivector + * \param n the vectors' length in the multivectors + * \param alpha the alpha value + * \param x the first input multivector + * \param y the second input multivector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ +void spgpuDmaxy(spgpuHandle_t handle, + __device double *z, + int n, + double alpha, + __device double* x, + __device double *y, + int count, + int pitch); + +/** +* \fn void spgpuDmaxypbz(spgpuHandle_t handle, __device double *w, int n, double beta, __device double *z, double alpha, __device double* x, __device double *y, int count, int pitch) + * Computes the double precision w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + * \param count the number of vectors in w,z,x and y multivectors + * \param pitch the multivectors' pitch + */ + +void spgpuDmaxypbz(spgpuHandle_t handle, + __device double *w, + int n, + double beta, + __device double *z, + double alpha, + __device double* x, + __device double *y, + int count, + int pitch); + +/** +* \fn void spgpuDgath(spgpuHandle_t handle, __device double *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device double* y) + * Double precision gather from y to sparse(x). Computes the double precision gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuDgath(spgpuHandle_t handle, + __device double *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device double* y); + +/** +* \fn void spgpuDscat(spgpuHandle_t handle, __device double* y, int xNnz, const __device double *xValues, const __device int *xIndices, int xBaseIndex, double beta) + * Double precision scatter from sparse(x) to y. Computes the single precision scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuDscat(spgpuHandle_t handle, + __device double* y, + int xNnz, + const __device double *xValues, + const __device int *xIndices, + int xBaseIndex, double beta); + +double spgpuDasum(spgpuHandle_t handle, + int n, + double* x); + +double spgpuDamax(spgpuHandle_t handle, + int n, + double* x); + +void spgpuDmasum(spgpuHandle_t handle, + double* y, + int n, + double* x, + int count, + int pitch); + +void spgpuDmamax(spgpuHandle_t handle, + double* y, + int n, + double* x, + int count, + int pitch); + +/** +* \fn cuFloatComplex spgpuCdot (spgpuHandle_t handle, int n, __device cuFloatComplex* a, __device cuFloatComplex* b) + * Computes single precision complex dot product of a and b vectors. + * \param handle The spgpu handle used to call this routine + * \param n the vectors length + * \param a the first input vector + * \param b the second input vector + * \return the dot product + */ +cuFloatComplex spgpuCdot(spgpuHandle_t handle, + int n, + __device cuFloatComplex* a, + __device cuFloatComplex* b); + +/** +* \fn cuFloatComplex spgpuCmdot (spgpuHandle_t handle, cuFloatComplex* y, int n, __device cuFloatComplex* a, __device cuFloatComplex* b, int count, int pitch) + * Computes single precision complex dot product of a and b multivectors. + * \param handle the spgpu handle used to call this routine + * \param y the result, made by dot products of every vector couples from the multivectors a and b + * \param n the vectors' length + * \param a the first input multivector + * \param b the second input multivector + * \param count the number of vectors in every multivector + * \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.). + */ +void spgpuCmdot(spgpuHandle_t handle, + cuFloatComplex* y, + int n, + __device cuFloatComplex* a, + __device cuFloatComplex* b, + int count, + int pitch); + + +/** +* \fn void spgpuCabs (spgpuHandle_t handle, __device float* y, int n, float alpha, __device cuFloatComplex* x) + * Computes single precision complex (y = alpha * abs(x)) for each element in x. + * \param handle The spgpu handle used to call this routine + * \param y the resulting vector (could be x) + * \param n the vectors length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuCabs(spgpuHandle_t handle, + __device cuFloatComplex* y, + int n, + cuFloatComplex alpha, + __device cuFloatComplex* x); + +/** +* \fn float spgpuCnrm2(spgpuHandle_t handle, int n, __device cuFloatComplex* x) + * Computes the single precision complex Euclidean vector norm of x. + * \param handle the spgpu handle used to call this routine + * \param n the vector's length + * \param x the input vector + * \return the euclidean vector norm + */ +float spgpuCnrm2(spgpuHandle_t handle, + int n, + __device cuFloatComplex* x); + +/** +* \fn void spgpuCmnrm2(spgpuHandle_t handle, float *y, int n, __device cuFloatComplex *x, int count, int pitch) + * Computes the single precision complex Euclidean vector norm for every vector in the multivector x. + * \param handle the spgpu handle used to call this routine + * \param y the array of results + * \param n the vectors' length in the x multivector + * \param x the input multivector + * \param count the number of vectors in x + * \param pitch the multivector's pitch + */ +void spgpuCmnrm2(spgpuHandle_t handle, + float *y, + int n, + __device cuFloatComplex *x, + int count, + int pitch); + + +/** +* \fn void spgpuCscal(spgpuHandle_t handle, __device cuFloatComplex *y, int n, cuFloatComplex alpha, __device cuFloatComplex *x) + * Computes the single precision complex y = alpha * x. y could be exactly x (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param y the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuCscal(spgpuHandle_t handle, + __device cuFloatComplex *y, + int n, + cuFloatComplex alpha, + __device cuFloatComplex *x); + +/** +* \fn void spgpuCaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x) + * Computes the single precision complex z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + */ +void spgpuCaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x); + + +void spgpuCabgdxyz(spgpuHandle_t handle, + int n, + cuFloatComplex alpha, + cuFloatComplex beta, + cuFloatComplex gamma, + cuFloatComplex delta, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + __device cuFloatComplex *z) +; + + void spgpuCxyzw(spgpuHandle_t handle, + int n, + cuFloatComplex a, cuFloatComplex b, + cuFloatComplex c, cuFloatComplex d, + cuFloatComplex e, cuFloatComplex f, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + __device cuFloatComplex *z, + __device cuFloatComplex *w) +; + + +/** +* \fn void spgpuCmaxpby(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex beta, __device cuFloatComplex *y, cuFloatComplex alpha, __device cuFloatComplex* x, int count, int pitch) + * Computes the single precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ + +void spgpuCmaxpby(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex beta, + __device cuFloatComplex *y, + cuFloatComplex alpha, + __device cuFloatComplex* x, + int count, int pitch); + +/** +* \fn void spgpuCaxy(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex alpha, __device cuFloatComplex *x, __device cuFloatComplex* y) + * Computes the single precision complex z = alpha * x * y. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the first input vector + * \param y the second input vector + */ +void spgpuCaxy(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex alpha, + __device cuFloatComplex *x, + __device cuFloatComplex *y); + +/** +* \fn void spgpuCaxypbz(spgpuHandle_t handle, __device cuFloatComplex *w, int n, cuFloatComplex beta, __device cuFloatComplex *z, cuFloatComplex alpha, __device cuFloatComplex* x, __device cuFloatComplex *y) + * Computes the single precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + */ +void spgpuCaxypbz(spgpuHandle_t handle, + __device cuFloatComplex *w, + int n, + cuFloatComplex beta, + __device cuFloatComplex *z, + cuFloatComplex alpha, + __device cuFloatComplex* x, + __device cuFloatComplex *y); + +/** +* \fn void spgpuCmaxy(spgpuHandle_t handle, __device cuFloatComplex *z, int n, cuFloatComplex alpha, __device cuFloatComplex *x, __device cuFloatComplex* y, int count, int pitch) + * Computes the single precision complex z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting multivector + * \param n the vectors' length in the multivectors + * \param alpha the alpha value + * \param x the first input multivector + * \param y the second input multivector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ +void spgpuCmaxy(spgpuHandle_t handle, + __device cuFloatComplex *z, + int n, + cuFloatComplex alpha, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + int count, + int pitch); + +/** +* \fn void spgpuCmaxypbz(spgpuHandle_t handle, __device cuFloatComplex *w, int n, cuFloatComplex beta, __device cuFloatComplex *z, cuFloatComplex alpha, __device cuFloatComplex* x, __device cuFloatComplex *y, int count, int pitch) + * Computes the single precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + * \param count the number of vectors in w,z,x and y multivectors + * \param pitch the multivectors' pitch + */ + +void spgpuCmaxypbz(spgpuHandle_t handle, + __device cuFloatComplex *w, + int n, + cuFloatComplex beta, + __device cuFloatComplex *z, + cuFloatComplex alpha, + __device cuFloatComplex* x, + __device cuFloatComplex *y, + int count, + int pitch); + +/** +* \fn void spgpuCgath(spgpuHandle_t handle, __device cuFloatComplex *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device cuFloatComplex* y) + * Single precision complex gather from y to sparse(x). Computes the single precision complex gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuCgath(spgpuHandle_t handle, + __device cuFloatComplex *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device cuFloatComplex* y); + +/** +* \fn void spgpuCscat(spgpuHandle_t handle, __device cuFloatComplex* y, int xNnz, const __device cuFloatComplex *xValues, const __device int *xIndices, int xBaseIndex, cuFloatComplex beta) + * Single precision complex scatter from sparse(x) to y. Computes the single precision complex scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuCscat(spgpuHandle_t handle, + __device cuFloatComplex* y, + int xNnz, + const __device cuFloatComplex *xValues, + const __device int *xIndices, + int xBaseIndex, cuFloatComplex beta); + +float spgpuCasum(spgpuHandle_t handle, + int n, + cuFloatComplex* x); + +float spgpuCamax(spgpuHandle_t handle, + int n, + cuFloatComplex* x); + +void spgpuCmasum(spgpuHandle_t handle, + float* y, + int n, + cuFloatComplex* x, + int count, + int pitch); + +void spgpuCmamax(spgpuHandle_t handle, + float* y, + int n, + cuFloatComplex* x, + int count, + int pitch); + +/** +* \fn void spgpuZscal(spgpuHandle_t handle, __device cuDoubleComplex *y, int n, cuDoubleComplex alpha, __device cuDoubleComplex *x) + * Computes the double precision complex y = alpha * x. y could be exactly x (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param y the resulting vector + * \param n the vectors' length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuZscal(spgpuHandle_t handle, + __device cuDoubleComplex *y, + int n, + cuDoubleComplex alpha, + __device cuDoubleComplex *x); + +/** +* \fn cuFloatComplex spgpuZdot (spgpuHandle_t handle, int n, __device cuDoubleComplex* a, __device cuDoubleComplex* b) + * Computes double precision complex dot product of a and b vectors. + * \param handle The spgpu handle used to call this routine + * \param n the vectors length + * \param a the first input vector + * \param b the second input vector + * \return the dot product + */ + cuDoubleComplex spgpuZdot(spgpuHandle_t handle, + int n, + __device cuDoubleComplex* a, + __device cuDoubleComplex* b); + +/** +* \fn cuFloatComplex spgpuZmdot (spgpuHandle_t handle, cuDoubleComplex* y, int n, __device cuDoubleComplex* a, __device cuDoubleComplex* b, int count, int pitch) + * Computes double precision complex dot product of a and b multivectors. + * \param handle the spgpu handle used to call this routine + * \param y the result, made by dot products of every vector couples from the multivectors a and b + * \param n the vectors' length + * \param a the first input multivector + * \param b the second input multivector + * \param count the number of vectors in every multivector + * \param pitch the pitch, in number of elements, of every multivectors (so the second element of the first vector in a will be a[pitch], the third a[2*pitch], etc.). + */ +void spgpuZmdot(spgpuHandle_t handle, + cuDoubleComplex* y, + int n, + __device cuDoubleComplex* a, + __device cuDoubleComplex* b, + int count, + int pitch); + +/** +* \fn void spgpuZabs (spgpuHandle_t handle, __device double* y, int n, double alpha, __device cuDoubleComplex* x) + * Computes double precision complex (y = alpha * abs(x)) for each element in x. + * \param handle The spgpu handle used to call this routine + * \param y the resulting vector (could be x) + * \param n the vectors length + * \param alpha the alpha value + * \param x the input vector + */ +void spgpuZabs(spgpuHandle_t handle, + __device cuDoubleComplex* y, + int n, + cuDoubleComplex alpha, + __device cuDoubleComplex* x); + +/** +* \fn double spgpuZnrm2(spgpuHandle_t handle, int n, __device cuDoubleComplex* x) + * Computes the double precision complex Euclidean vector norm of x. + * \param handle the spgpu handle used to call this routine + * \param n the vector's length + * \param x the input vector + * \return the euclidean vector norm + */ + double spgpuZnrm2(spgpuHandle_t handle, + int n, + __device cuDoubleComplex* x); + +/** +* \fn void spgpuZmnrm2(spgpuHandle_t handle, double *y, int n, __device cuDoubleComplex *x, int count, int pitch) + * Computes the double precision complex Euclidean vector norm for every vector in the multivector x. + * \param handle the spgpu handle used to call this routine + * \param y the array of results + * \param n the vectors' length in the x multivector + * \param x the input multivector + * \param count the number of vectors in x + * \param pitch the multivector's pitch + */ + +void spgpuZmnrm2(spgpuHandle_t handle, + double *y, + int n, + __device cuDoubleComplex *x, + int count, + int pitch); + +/** +* \fn void spgpuZaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x) + * Computes the double precision complex z = beta * y + alpha * x. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + */ +void spgpuZaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x); + + +void spgpuZabgdxyz(spgpuHandle_t handle, + int n, + cuDoubleComplex alpha, + cuDoubleComplex beta, + cuDoubleComplex gamma, + cuDoubleComplex delta, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + __device cuDoubleComplex *z) +; + + + void spgpuZxyzw(spgpuHandle_t handle, + int n, + cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + __device cuDoubleComplex *z, + __device cuDoubleComplex *w) +; + + +/** +* \fn void spgpuZmaxpby(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex beta, __device cuDoubleComplex *y, cuDoubleComplex alpha, __device cuDoubleComplex* x, int count, int pitch) + * Computes the double precision complex z = beta * y + alpha * x of x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param y the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivector's pitch + */ + + void spgpuZmaxpby(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *y, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + int count, int pitch); + +/** +* \fn void spgpuZaxy(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex alpha, __device cuDoubleComplex *x, __device cuDoubleComplex* y) + * Computes the double precision complex z = alpha * x * y. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting vector + * \param n the vectors' length in the x multivector + * \param alpha the alpha value + * \param x the second input vector + * \param y the first input vector + */ +void spgpuZaxy(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex alpha, + __device cuDoubleComplex *x, + __device cuDoubleComplex *y); + +/** +* \fn void spgpuZaxypbz(spgpuHandle_t handle, __device cuDoubleComplex *w, int n, cuDoubleComplex beta, __device cuDoubleComplex *z, cuDoubleComplex alpha, __device cuDoubleComplex* x, __device cuDoubleComplex *y) + * Computes the double precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + */void spgpuZaxypbz(spgpuHandle_t handle, + __device cuDoubleComplex *w, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *z, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y); + +/** +* \fn void spgpuZmaxy(spgpuHandle_t handle, __device cuDoubleComplex *z, int n, cuDoubleComplex alpha, __device cuDoubleComplex *x, __device cuDoubleComplex* y, int count, int pitch) + * Computes the double precision complex z = alpha * x * y for z,x and y multivectors. z could be exactly x or y (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param z the resulting multivector + * \param n the vectors' length in the multivectors + * \param alpha the alpha value + * \param x the first input multivector + * \param y the second input multivector + * \param count the number of vectors in z,x and y multivectors + * \param pitch the multivectors pitch + */ +void spgpuZmaxy(spgpuHandle_t handle, + __device cuDoubleComplex *z, + int n, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + int count, + int pitch); + +/** +* \fn void spgpuZmaxypbz(spgpuHandle_t handle, __device cuDoubleComplex *w, int n, cuDoubleComplex beta, __device cuDoubleComplex *z, cuDoubleComplex alpha, __device cuDoubleComplex* x, __device cuDoubleComplex *y, int count, int pitch) + * Computes the double precision complex w = beta * z + alpha * x * y. w could be exactly x, y or z (without offset) or another vector. + * \param handle the spgpu handle used to call this routine + * \param w the resulting vector + * \param n the vectors' length + * \param beta the beta value + * \param z the first input vector + * \param alpha the alpha value + * \param x the second input vector + * \param y the third input vector + * \param count the number of vectors in w,z,x and y multivectors + * \param pitch the multivectors' pitch + */ + +void spgpuZmaxypbz(spgpuHandle_t handle, + __device cuDoubleComplex *w, + int n, + cuDoubleComplex beta, + __device cuDoubleComplex *z, + cuDoubleComplex alpha, + __device cuDoubleComplex* x, + __device cuDoubleComplex *y, + int count, + int pitch); + +/** +* \fn void spgpuZgath(spgpuHandle_t handle, __device cuDoubleComplex *xValues, int xNnz, const __device int *xIndices, int xBaseIndex, const __device cuDoubleComplex* y) + * Computes the double precision complex gather from y to xValues (using xIndices). + * \param handle the spgpu handle used to call this routine + * \param xValues the destination array for gathered values + * \param xNnz the number of elements to gather + * \param xIndices the array of indices for the elements to be gathered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param y the source vector (from which the elements will be gathered) + */ +void spgpuZgath(spgpuHandle_t handle, + __device cuDoubleComplex *xValues, + int xNnz, + const __device int *xIndices, + int xBaseIndex, + const __device cuDoubleComplex* y); + +/** +* \fn void spgpuZscat(spgpuHandle_t handle, __device cuDoubleComplex* y, int xNnz, const __device cuDoubleComplex *xValues, const __device int *xIndices, int xBaseIndex, cuDoubleComplex beta) + * Computes the double precision complex scatter from xValues to y (using xIndices). + * The scattered element will be, for i in [0,xNnz), y[xIndices[i]] = beta*y[xIndices[i]] + xValues[i] (to be noted that + * y values will be multiplied with beta just for scattered values). + * \param handle the spgpu handle used to call this routine + * \param y the destination vector (to which the elements will be scattered) + * \param xNnz the number of elements to scatter + * \param xValues the source array from which the values will be read + * \param xIndices the array of indices for the elements to be scattered + * \param xBaseIndex the base index used in xIndices (i.e. 0 for C, 1 for Fortran). + * \param beta the beta value + */ +void spgpuZscat(spgpuHandle_t handle, + __device cuDoubleComplex* y, + int xNnz, + const __device cuDoubleComplex *xValues, + const __device int *xIndices, + int xBaseIndex, cuDoubleComplex beta); + + +double spgpuZasum(spgpuHandle_t handle, + int n, + cuDoubleComplex* x); + +double spgpuZamax(spgpuHandle_t handle, + int n, + cuDoubleComplex* x); + +void spgpuZmasum(spgpuHandle_t handle, + double* y, + int n, + cuDoubleComplex* x, + int count, + int pitch); + +void spgpuZmamax(spgpuHandle_t handle, + double* y, + int n, + cuDoubleComplex* x, + int count, + int pitch); + +/** @}*/ + +void spgpuIsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + int val, + __device int *y); + +void spgpuSsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + float val, + __device float *y); + +void spgpuDsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + double val, + __device double *y); + +void spgpuCsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + cuFloatComplex val, + __device cuFloatComplex* y); + +void spgpuZsetscal(spgpuHandle_t handle, + int first, + int last, + int baseIndex, + cuDoubleComplex val, + __device cuDoubleComplex* y); + + +#ifdef __cplusplus +} +#endif diff --git a/cuda/svectordev.c b/cuda/svectordev.c new file mode 100644 index 00000000..bf7545bb --- /dev/null +++ b/cuda/svectordev.c @@ -0,0 +1,339 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "svectordev.h" + + +int registerMappedFloat(void *buff, void **d_p, int n, float dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(float)); +} + +int writeMultiVecDeviceFloat(void* deviceVec, float* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, devVec->pitch_*devVec->count_*sizeof(float)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceFloatR2(void* deviceVec, float* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceFloat(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceFloatR2",i); + } + return(i); +} + +int readMultiVecDeviceFloat(void* deviceVec, float* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(float)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloat",i); + } + return(i); +} + +int readMultiVecDeviceFloatR2(void* deviceVec, float* hostVec, int ld) +{ int i; + i = readMultiVecDeviceFloat(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceFloatR2",i); + } + return(i); +} + +int setscalMultiVecDeviceFloat(float val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuSsetscal(handle, first, last, indexBase, val, (float *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceFloat(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + float beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + + if (dupl == INS_OVERWRITE) + beta = 0.0; + else if (dupl == INS_ADD) + beta = 1.0; + else + beta = 0.0; + + spgpuSscat(handle, (float *) devVecX->v_, n, (float*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceFloat(deviceVec, vectorId, n, + first, (void*) devIdx->v_, hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + float *hv = (float *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuSgath(handle,hv, n, idx,indexBase, (float *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, int first, void *deviceIdx, + int hfirst, void* host_values, int indexBase, float beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceFloat(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, float beta) +{ int i=0; + float *hv = (float *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuSscat(handle, (float *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmnrm2(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmamax(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuSmasum(handle, y_res, n,(float *)devVecA->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuSscal(handle, (float *)devVecA->v_, devVecA->pitch_, + alpha, (float *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX, + float beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuSaxpby(handle,(float*)devVecY->v_+pitch*j, n, beta, + (float*)devVecY->v_+pitch*j, alpha,(float*) devVecX->v_+pitch*j); + return(i); +} + +int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSabgdxyz(handle,n, alpha,beta,gamma,delta, + (float*)devVecX->v_,(float*) devVecY->v_,(float*) devVecZ->v_); + return(i); +} + + +int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSxyzw(handle,n, a,b,c,d,e,f, + (float*)devVecX->v_,(float*) devVecY->v_, + (float*) devVecZ->v_,(float*) devVecW->v_); + return(i); +} + +int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSmaxy(handle, (float*)devVecB->v_, n, alpha, (float*)devVecA->v_, + (float*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, + void *deviceVecB, float beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuSmaxypbz(handle, (float*)devVecZ->v_, n, beta, (float*)devVecZ->v_, + alpha, (float*) devVecA->v_, (float*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + +int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuSabs(handle, (float*)devVecB->v_, n, alpha, (float*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_cudaGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuSabs(handle, (float*)devVecA->v_, n, alpha, (float*)devVecA->v_); + + return(i); +} + diff --git a/cuda/svectordev.h b/cuda/svectordev.h new file mode 100644 index 00000000..363c0108 --- /dev/null +++ b/cuda/svectordev.h @@ -0,0 +1,81 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedFloat(void *, void **, int, float); +int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); +int writeMultiVecDeviceFloatR2(void* deviceMultiVec, float* hostMultiVec, int ld); +int readMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); +int readMultiVecDeviceFloatR2(void* deviceMultiVec, float* hostMultiVec, int ld); + +int setscalMultiVecDeviceFloat(float val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceFloat(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceFloatVecIdx(void* deviceVec, int vectorId, int n, int first, + void *deviceIdx, int hfirst, void* host_values, + int indexBase, float beta); +int iscatMultiVecDeviceFloat(void* deviceVec, int vectorId, int n, int first, void *indexes, + int hfirst, void* host_values, int indexBase, float beta); + +int scalMultiVecDeviceFloat(float alpha, void* devMultiVecA); +int nrm2MultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int amaxMultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA); +int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY); +int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); +int axyMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceFloat(int n, float alpha, void *deviceVecA, + void *deviceVecB, float beta, void *deviceVecZ); +int absMultiVecDeviceFloat(int n, float alpha, void *deviceVecA); +int absMultiVecDeviceFloat2(int n, float alpha, void *deviceVecA, void *deviceVecB); + + diff --git a/cuda/vectordev.c b/cuda/vectordev.c new file mode 100644 index 00000000..65b4d533 --- /dev/null +++ b/cuda/vectordev.c @@ -0,0 +1,195 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +//new +MultiVectorDeviceParams getMultiVectorDeviceParams(unsigned int count, unsigned int size, + unsigned int elementType) +{ + struct MultiVectorDeviceParams params; + + if (count == 1) + params.pitch = size; + else + if (elementType == SPGPU_TYPE_INT) + { + //fprintf(stderr,"Getting parms for a DOUBLE vector\n"); + params.pitch = (((size*sizeof(int) + 255)/256)*256)/sizeof(int); + } + else if (elementType == SPGPU_TYPE_DOUBLE) + { + //fprintf(stderr,"Getting parms for a DOUBLE vector\n"); + params.pitch = (((size*sizeof(double) + 255)/256)*256)/sizeof(double); + } + else if (elementType == SPGPU_TYPE_FLOAT) + { + params.pitch = (((size*sizeof(float) + 255)/256)*256)/sizeof(float); + } + else if (elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + params.pitch = (((size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex); + } + else if (elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + params.pitch = (((size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex); + } + else + params.pitch = 0; + + params.elementType = elementType; + + params.count = count; + params.size = size; + + return params; + +} +//new +int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *params) +{ + if (params->pitch == 0) + return SPGPU_UNSUPPORTED; // Unsupported params + + struct MultiVectDevice *tmp = (struct MultiVectDevice *)malloc(sizeof(struct MultiVectDevice)); + *remoteMultiVec = (void *)tmp; + tmp->size_ = params->size; + tmp->count_ = params->count; + + if (params->elementType == SPGPU_TYPE_INT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (((params->size*sizeof(int) + 255)/256)*256)/sizeof(int); + //fprintf(stderr,"Allocating an INT vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); + + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(int)); + } + else if (params->elementType == SPGPU_TYPE_FLOAT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (((params->size*sizeof(float) + 255)/256)*256)/sizeof(float); + + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*params->count*sizeof(float)); + } + else if (params->elementType == SPGPU_TYPE_DOUBLE) + { + + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(double) + 255)/256)*256)/sizeof(double); + //fprintf(stderr,"Allocating a DOUBLE vector %ld\n",tmp->pitch_*tmp->count_*sizeof(double)); + + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(double)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_FLOAT) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(cuFloatComplex) + 255)/256)*256)/sizeof(cuFloatComplex); + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuFloatComplex)); + } + else if (params->elementType == SPGPU_TYPE_COMPLEX_DOUBLE) + { + if (params->count == 1) + tmp->pitch_ = params->size; + else + tmp->pitch_ = (int)(((params->size*sizeof(cuDoubleComplex) + 255)/256)*256)/sizeof(cuDoubleComplex); + return allocRemoteBuffer((void **)&(tmp->v_), tmp->pitch_*tmp->count_*sizeof(cuDoubleComplex)); + } + else + return SPGPU_UNSUPPORTED; // Unsupported params + return SPGPU_SUCCESS; // Success +} + + +int unregisterMapped(void *buff) +{ + return unregisterMappedMemory(buff); +} + +void freeMultiVecDevice(void* deviceVec) +{ + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // fprintf(stderr,"freeMultiVecDevice\n"); + if (devVec != NULL) { + //fprintf(stderr,"Before freeMultiVecDevice% ld\n",devVec->pitch_*devVec->count_*sizeof(double)); + freeRemoteBuffer(devVec->v_); + free(deviceVec); + } +} + +int FallocMultiVecDevice(void** deviceMultiVec, unsigned int count, + unsigned int size, unsigned int elementType) +{ int i; + struct MultiVectorDeviceParams p; + + p = getMultiVectorDeviceParams(count, size, elementType); + i = allocMultiVecDevice(deviceMultiVec, &p); + //cudaSync(); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d, %d %d \n","FallocMultiVecDevice",i, count, size); + } + return(i); +} + +int getMultiVecDeviceSize(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->size_; + return(i); +} + +int getMultiVecDeviceCount(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->count_; + return(i); +} + +int getMultiVecDevicePitch(void* deviceVec) +{ int i; + struct MultiVectDevice *dev = (struct MultiVectDevice *) deviceVec; + i = dev->pitch_; + return(i); +} + diff --git a/cuda/vectordev.h b/cuda/vectordev.h new file mode 100644 index 00000000..df5fbd82 --- /dev/null +++ b/cuda/vectordev.h @@ -0,0 +1,87 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + +#pragma once +//#include "utils.h" +#include "cuda_runtime.h" +//#include "common.h" +//#include "cintrf.h" +#include + +struct MultiVectDevice +{ + // number of vectors + int count_; + + //number of elements for a single vector + int size_; + + //pithc in number of elements + int pitch_; + + // Vectors in device memory (single allocation) + void *v_; +}; + +typedef struct MultiVectorDeviceParams +{ + // number on vectors + unsigned int count; //1 for a simple vector + + // The resulting allocation will be pitch*s*(size of the elementType) + unsigned int elementType; + + // Pitch (in number of elements) + unsigned int pitch; + + // Size of a single vector (in number of elements). + unsigned int size; +} MultiVectorDeviceParams; + + +#define INS_OVERWRITE 0 +#define INS_ADD 1 + + +int unregisterMapped(void *); + +MultiVectorDeviceParams getMultiVectorDeviceParams(unsigned int count, + unsigned int size, + unsigned int elementType); + +int FallocMultiVecDevice(void** deviceMultiVec, unsigned count, + unsigned int size, unsigned int elementType); +void freeMultiVecDevice(void* deviceVec); +int allocMultiVecDevice(void ** remoteMultiVec, struct MultiVectorDeviceParams *params); +int getMultiVecDeviceSize(void* deviceVec); +int getMultiVecDeviceCount(void* deviceVec); +int getMultiVecDevicePitch(void* deviceVec); + diff --git a/cuda/z_cusparse_mod.F90 b/cuda/z_cusparse_mod.F90 new file mode 100644 index 00000000..91a6fcbd --- /dev/null +++ b/cuda/z_cusparse_mod.F90 @@ -0,0 +1,313 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module z_cusparse_mod + use base_cusparse_mod + + type, bind(c) :: z_Cmat + type(c_ptr) :: Mat = c_null_ptr + end type z_Cmat + +#if CUDA_SHORT_VERSION <= 10 + type, bind(c) :: z_Hmat + type(c_ptr) :: Mat = c_null_ptr + end type z_Hmat +#endif + + interface CSRGDeviceFree + function z_CSRGDeviceFree(Mat) & + & bind(c,name="z_CSRGDeviceFree") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGDeviceFree + end interface + + interface CSRGDeviceSetMatType + function z_CSRGDeviceSetMatType(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatType") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatType + end interface + + interface CSRGDeviceSetMatFillMode + function z_CSRGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatFillMode") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatFillMode + end interface + + interface CSRGDeviceSetMatDiagType + function z_CSRGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatDiagType") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatDiagType + end interface + + interface CSRGDeviceSetMatIndexBase + function z_CSRGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="z_CSRGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_CSRGDeviceSetMatIndexBase + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface CSRGDeviceCsrsmAnalysis + function z_CSRGDeviceCsrsmAnalysis(Mat) & + & bind(c,name="z_CSRGDeviceCsrsmAnalysis") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGDeviceCsrsmAnalysis + end interface +#else + interface CSRGIsNullSvBuffer + function z_CSRGIsNullSvBuffer(Mat) & + & bind(c,name="z_CSRGIsNullSvBuffer") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: res + end function z_CSRGIsNullSvBuffer + end interface +#endif + + interface CSRGDeviceAlloc + function z_CSRGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="z_CSRGDeviceAlloc") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function z_CSRGDeviceAlloc + end interface + + interface CSRGDeviceGetParms + function z_CSRGDeviceGetParms(Mat,nr,nc,nz) & + & bind(c,name="z_CSRGDeviceGetParms") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int) :: nr, nc, nz + integer(c_int) :: res + end function z_CSRGDeviceGetParms + end interface + + interface spsvCSRGDevice + function z_spsvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spsvCSRGDevice") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spsvCSRGDevice + end interface + + interface spmvCSRGDevice + function z_spmvCSRGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spmvCSRGDevice") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spmvCSRGDevice + end interface + + interface CSRGHost2Device + function z_CSRGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_CSRGHost2Device") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_CSRGHost2Device + end interface + + interface CSRGDevice2Host + function z_CSRGDevice2Host(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_CSRGDevice2Host") result(res) + use iso_c_binding + import z_Cmat + type(z_Cmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_CSRGDevice2Host + end interface + +#if CUDA_SHORT_VERSION <= 10 + interface HYBGDeviceAlloc + function z_HYBGDeviceAlloc(Mat,nr,nc,nz) & + & bind(c,name="z_HYBGDeviceAlloc") result(res) + use iso_c_binding + import z_hmat + type(z_Hmat) :: Mat + integer(c_int), value :: nr, nc, nz + integer(c_int) :: res + end function z_HYBGDeviceAlloc + end interface + + interface HYBGDeviceFree + function z_HYBGDeviceFree(Mat) & + & bind(c,name="z_HYBGDeviceFree") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int) :: res + end function z_HYBGDeviceFree + end interface + + interface HYBGDeviceSetMatType + function z_HYBGDeviceSetMatType(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatType") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatType + end interface + + interface HYBGDeviceSetMatFillMode + function z_HYBGDeviceSetMatFillMode(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatFillMode") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatFillMode + end interface + + interface HYBGDeviceSetMatDiagType + function z_HYBGDeviceSetMatDiagType(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatDiagType") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatDiagType + end interface + + interface HYBGDeviceSetMatIndexBase + function z_HYBGDeviceSetMatIndexBase(Mat,type) & + & bind(c,name="z_HYBGDeviceSetMatIndexBase") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int),value :: type + integer(c_int) :: res + end function z_HYBGDeviceSetMatIndexBase + end interface + + interface HYBGDeviceHybsmAnalysis + function z_HYBGDeviceHybsmAnalysis(Mat) & + & bind(c,name="z_HYBGDeviceHybsmAnalysis") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int) :: res + end function z_HYBGDeviceHybsmAnalysis + end interface + + interface spsvHYBGDevice + function z_spsvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spsvHYBGDevice") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spsvHYBGDevice + end interface + + interface spmvHYBGDevice + function z_spmvHYBGDevice(Mat,alpha,x,beta,y) & + & bind(c,name="z_spmvHYBGDevice") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + type(c_ptr), value :: x + type(c_ptr), value :: y + complex(c_double_complex), value :: alpha,beta + integer(c_int) :: res + end function z_spmvHYBGDevice + end interface + + interface HYBGHost2Device + function z_HYBGHost2Device(Mat,m,n,nz,irp,ja,val) & + & bind(c,name="z_HYBGHost2Device") result(res) + use iso_c_binding + import z_Hmat + type(z_Hmat) :: Mat + integer(c_int), value :: m,n,nz + integer(c_int) :: irp(*), ja(*) + complex(c_double_complex) :: val(*) + integer(c_int) :: res + end function z_HYBGHost2Device + end interface + +#endif + +end module z_cusparse_mod diff --git a/cuda/zcusparse.c b/cuda/zcusparse.c new file mode 100644 index 00000000..93142d22 --- /dev/null +++ b/cuda/zcusparse.c @@ -0,0 +1,42 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include + +#include +#include +#include "fcusparse.h" + +#include "zcusparse.h" +#include "fcusparse_dat.h" +#include "fcusparse_fct.h" diff --git a/cuda/zcusparse.h b/cuda/zcusparse.h new file mode 100644 index 00000000..b63aa103 --- /dev/null +++ b/cuda/zcusparse.h @@ -0,0 +1,100 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#ifndef ZCUSPARSE_ +#define ZCUSPARSE_ + + +#include +#include + +#include +#include +#include "cintrf.h" + +/* Double precision real */ +#define TYPE double complex +#define CUSPARSE_BASE_TYPE CUDA_C_64F +#define T_CSRGDeviceMat z_CSRGDeviceMat +#define T_Cmat z_Cmat +#define T_spmvCSRGDevice z_spmvCSRGDevice +#define T_spsvCSRGDevice z_spsvCSRGDevice +#define T_CSRGDeviceAlloc z_CSRGDeviceAlloc +#define T_CSRGDeviceFree z_CSRGDeviceFree +#define T_CSRGHost2Device z_CSRGHost2Device +#define T_CSRGDevice2Host z_CSRGDevice2Host +#define T_CSRGDeviceSetMatFillMode z_CSRGDeviceSetMatFillMode +#define T_CSRGDeviceSetMatDiagType z_CSRGDeviceSetMatDiagType +#define T_CSRGDeviceGetParms z_CSRGDeviceGetParms + +#if CUDA_SHORT_VERSION <= 10 +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsmAnalysis z_CSRGDeviceCsrsmAnalysis +#define cusparseTcsrmv cusparseZcsrmv +#define cusparseTcsrsv_solve cusparseZcsrsv_solve +#define cusparseTcsrsv_analysis cusparseZcsrsv_analysis +#define T_HYBGDeviceMat z_HYBGDeviceMat +#define T_Hmat z_Hmat +#define T_HYBGDeviceFree z_HYBGDeviceFree +#define T_spmvHYBGDevice z_spmvHYBGDevice +#define T_HYBGDeviceAlloc z_HYBGDeviceAlloc +#define T_HYBGDeviceSetMatDiagType z_HYBGDeviceSetMatDiagType +#define T_HYBGDeviceSetMatIndexBase z_HYBGDeviceSetMatIndexBase +#define T_HYBGDeviceSetMatType z_HYBGDeviceSetMatType +#define T_HYBGDeviceSetMatFillMode z_HYBGDeviceSetMatFillMode +#define T_HYBGDeviceHybsmAnalysis z_HYBGDeviceHybsmAnalysis +#define T_spsvHYBGDevice z_spsvHYBGDevice +#define T_HYBGHost2Device z_HYBGHost2Device +#define cusparseThybmv cusparseZhybmv +#define cusparseThybsv_solve cusparseZhybsv_solve +#define cusparseThybsv_analysis cusparseZhybsv_analysis +#define cusparseTcsr2hyb cusparseZcsr2hyb + +#elif CUDA_VERSION < 11030 + +#define T_CSRGDeviceSetMatType z_CSRGDeviceSetMatType +#define T_CSRGDeviceSetMatIndexBase z_CSRGDeviceSetMatIndexBase +#define T_CSRGDeviceCsrsv2Analysis z_CSRGDeviceCsrsv2Analysis +#define cusparseTcsrsv2_bufferSize cusparseZcsrsv2_bufferSize +#define cusparseTcsrsv2_analysis cusparseZcsrsv2_analysis +#define cusparseTcsrsv2_solve cusparseZcsrsv2_solve +#else + +#define T_CSRGIsNullSvBuffer z_CSRGIsNullSvBuffer +#define T_CSRGIsNullSvDescr z_CSRGIsNullSvDescr +#define T_CSRGIsNullMvDescr z_CSRGIsNullMvDescr +#define T_CSRGCreateSpMVDescr z_CSRGCreateSpMVDescr + +#endif + +#include "fcusparse.h" + +#endif diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c new file mode 100644 index 00000000..e9f0cec7 --- /dev/null +++ b/cuda/zvectordev.c @@ -0,0 +1,358 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#include +#include +//#include "utils.h" +//#include "common.h" +#include "zvectordev.h" + + +int registerMappedDoubleComplex(void *buff, void **d_p, int n, cuDoubleComplex dummy) +{ + return registerMappedMemory(buff,d_p,n*sizeof(cuDoubleComplex)); +} + +int writeMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) +{ int i; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + // Ex updateFromHost vector function + i = writeRemoteBuffer((void*) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","FallocMultiVecDevice",i); + } + return(i); +} + +int writeMultiVecDeviceDoubleComplexR2(void* deviceVec, cuDoubleComplex* hostVec, int ld) +{ int i; + i = writeMultiVecDeviceDoubleComplex(deviceVec, (void *) hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","writeMultiVecDeviceDoubleComplexR2",i); + } + return(i); +} + +int readMultiVecDeviceDoubleComplex(void* deviceVec, cuDoubleComplex* hostVec) +{ int i,j; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + i = readRemoteBuffer((void *) hostVec, (void *)devVec->v_, + devVec->pitch_*devVec->count_*sizeof(cuDoubleComplex)); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplex",i); + } + return(i); +} + +int readMultiVecDeviceDoubleComplexR2(void* deviceVec, cuDoubleComplex* hostVec, int ld) +{ int i; + i = readMultiVecDeviceDoubleComplex(deviceVec, hostVec); + if (i != 0) { + fprintf(stderr,"From routine : %s : %d \n","readMultiVecDeviceDoubleComplexR2",i); + } + return(i); +} + +int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, + int indexBase, void* devMultiVecX) +{ int i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuZsetscal(handle, first, last, indexBase, val, (cuDoubleComplex *) devVecX->v_); + + return(i); +} + +int geinsMultiVecDeviceDoubleComplex(int n, void* devMultiVecIrl, void* devMultiVecVal, + int dupl, int indexBase, void* devMultiVecX) +{ int j=0, i=0,nmin=0,nmax=0; + int pitch = 0; + cuDoubleComplex beta; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecIrl = (struct MultiVectDevice *) devMultiVecIrl; + struct MultiVectDevice *devVecVal = (struct MultiVectDevice *) devMultiVecVal; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecIrl->pitch_; + if ((n > devVecIrl->size_) || (n>devVecVal->size_ )) + return SPGPU_UNSUPPORTED; + + //fprintf(stderr,"geins: %d %d %p %p %p\n",dupl,n,devVecIrl->v_,devVecVal->v_,devVecX->v_); + if (dupl == INS_OVERWRITE) + beta = make_cuDoubleComplex(0.0, 0.0); + else if (dupl == INS_ADD) + beta = make_cuDoubleComplex(1.0, 0.0); + else + beta = make_cuDoubleComplex(0.0, 0.0); + + spgpuZscat(handle, (cuDoubleComplex *) devVecX->v_, n, (cuDoubleComplex*)devVecVal->v_, + (int*)devVecIrl->v_, indexBase, beta); + + return(i); +} + + +int igathMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + + i= igathMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, + first, (void*) devIdx->v_, + hfirst, host_values, indexBase); + return(i); +} + +int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, int hfirst, + void* host_values, int indexBase) +{ + int i, *idx =(int *) indexes;; + cuDoubleComplex *hv = (cuDoubleComplex *) host_values;; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + i=0; + hv = &(hv[hfirst-indexBase]); + idx = &(idx[first-indexBase]); + spgpuZgath(handle,hv, n, idx,indexBase, + (cuDoubleComplex *) devVec->v_+vectorId*devVec->pitch_); + return(i); +} + +int iscatMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void *deviceIdx, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta) +{ + int i, *idx; + struct MultiVectDevice *devIdx = (struct MultiVectDevice *) deviceIdx; + i= iscatMultiVecDeviceDoubleComplex(deviceVec, vectorId, n, first, + (void*) devIdx->v_, hfirst,host_values, indexBase, beta); + return(i); +} + +int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta) +{ int i=0; + cuDoubleComplex *hv = (cuDoubleComplex *) host_values; + int *idx=(int *) indexes; + struct MultiVectDevice *devVec = (struct MultiVectDevice *) deviceVec; + spgpuHandle_t handle=psb_cudaGetHandle(); + + idx = &(idx[first-indexBase]); + hv = &(hv[hfirst-indexBase]); + spgpuZscat(handle, (cuDoubleComplex *) devVec->v_, n, hv, idx, indexBase, beta); + return SPGPU_SUCCESS; + +} + + +int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmnrm2(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, devVecA->count_, devVecA->pitch_); + return(i); +} + +int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmamax(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + return(i); +} + +int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + + spgpuZmasum(handle, y_res, n,(cuDoubleComplex *)devVecA->v_, + devVecA->count_, devVecA->pitch_); + + return(i); +} + +int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA) +{ int i=0; + spgpuHandle_t handle=psb_cudaGetHandle(); + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + // Note: inner kernel can handle aliased input/output + spgpuZscal(handle, (cuDoubleComplex *)devVecA->v_, devVecA->pitch_, + alpha, (cuDoubleComplex *)devVecA->v_); + return(i); +} + +int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA, void* devMultiVecB) +{int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + + spgpuZmdot(handle, y_res, n, (cuDoubleComplex*)devVecA->v_, + (cuDoubleComplex*)devVecB->v_,devVecA->count_,devVecB->pitch_); + return(i); +} + +int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, + cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZabgdxyz(handle,n, alpha,beta,gamma,delta, + (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_); + return(i); +} + +int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) devMultiVecZ; + struct MultiVectDevice *devVecW = (struct MultiVectDevice *) devMultiVecW; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZxyzw(handle,n, a,b,c,d,e,f, + (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_, + (cuDoubleComplex *) devVecZ->v_,(cuDoubleComplex *) devVecW->v_); + return(i); +} + +int axpbyMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, void* devMultiVecX, + cuDoubleComplex beta, void* devMultiVecY) +{ int j=0, i=0; + int pitch = 0; + struct MultiVectDevice *devVecX = (struct MultiVectDevice *) devMultiVecX; + struct MultiVectDevice *devVecY = (struct MultiVectDevice *) devMultiVecY; + spgpuHandle_t handle=psb_cudaGetHandle(); + pitch = devVecY->pitch_; + if ((n > devVecY->size_) || (n>devVecX->size_ )) + return SPGPU_UNSUPPORTED; + + for(j=0;jcount_;j++) + spgpuZaxpby(handle,(cuDoubleComplex*)devVecY->v_+pitch*j, n, beta, + (cuDoubleComplex*)devVecY->v_+pitch*j, alpha, + (cuDoubleComplex*) devVecX->v_+pitch*j); + return(i); +} + +int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + spgpuHandle_t handle=psb_cudaGetHandle(); + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZmaxy(handle, (cuDoubleComplex*)devVecB->v_, n, alpha, + (cuDoubleComplex*)devVecA->v_, + (cuDoubleComplex*)devVecB->v_, devVecA->count_, devVecA->pitch_); + + return(i); +} + +int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB, cuDoubleComplex beta, void *deviceVecZ) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + struct MultiVectDevice *devVecZ = (struct MultiVectDevice *) deviceVecZ; + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ ) || (n>devVecZ->size_ )) + return SPGPU_UNSUPPORTED; + spgpuZmaxypbz(handle, (cuDoubleComplex*)devVecZ->v_, n, beta, + (cuDoubleComplex*)devVecZ->v_, + alpha, (cuDoubleComplex*) devVecA->v_, (cuDoubleComplex*) devVecB->v_, + devVecB->count_, devVecB->pitch_); + return(i); +} + + +int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB) +{ int i=0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + struct MultiVectDevice *devVecB = (struct MultiVectDevice *) deviceVecB; + + spgpuHandle_t handle=psb_cudaGetHandle(); + + if ((n > devVecA->size_) || (n>devVecB->size_ )) + return SPGPU_UNSUPPORTED; + + spgpuZabs(handle, (cuDoubleComplex*)devVecB->v_, n, + alpha, (cuDoubleComplex*)devVecA->v_); + + return(i); +} + +int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA) +{ int i = 0; + struct MultiVectDevice *devVecA = (struct MultiVectDevice *) deviceVecA; + spgpuHandle_t handle=psb_cudaGetHandle(); + if (n > devVecA->size_) + return SPGPU_UNSUPPORTED; + + spgpuZabs(handle, (cuDoubleComplex*)devVecA->v_, n, + alpha, (cuDoubleComplex*)devVecA->v_); + + return(i); +} + diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h new file mode 100644 index 00000000..ae623bdb --- /dev/null +++ b/cuda/zvectordev.h @@ -0,0 +1,96 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + + + +#pragma once +//#include "utils.h" +#include +#include "cuComplex.h" +#include "vectordev.h" +#include "cuda_runtime.h" +#include "core.h" + +int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex); +int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); +int writeMultiVecDeviceDoubleComplexR2(void* deviceMultiVec, + cuDoubleComplex* hostMultiVec, int ld); +int readMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); +int readMultiVecDeviceDoubleComplexR2(void* deviceMultiVec, + cuDoubleComplex* hostMultiVec, int ld); +int setscalMultiVecDeviceDoubleComplex(cuDoubleComplex val, int first, int last, + int indexBase, void* devVecX); + +int geinsMultiVecDeviceDoubleComplex(int n, void* devVecIrl, void* devVecVal, + int dupl, int indexBase, void* devVecX); + +int igathMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, int n, + int first, void* deviceIdx, int hfirst, + void* host_values, int indexBase); +int igathMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void* indexes, + int hfirst, void* host_values, + int indexBase); +int iscatMultiVecDeviceDoubleComplexVecIdx(void* deviceVec, int vectorId, + int n, int first, + void *deviceIdx, int hfirst, + void* host_values, + int indexBase, cuDoubleComplex beta); +int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n, + int first, void *indexes, + int hfirst, void* host_values, + int indexBase, cuDoubleComplex beta); + +int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA); +int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); +int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); +int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); +int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, + void* devVecA, void* devVecB); + +int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX, + cuDoubleComplex beta, void* devVecY); +int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex alpha, + cuDoubleComplex beta, cuDoubleComplex gamma, cuDoubleComplex delta, + void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ); +int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex a, cuDoubleComplex b, + cuDoubleComplex c, cuDoubleComplex d, + cuDoubleComplex e, cuDoubleComplex f, + void* devMultiVecX, void* devMultiVecY, + void* devMultiVecZ, void* devMultiVecW); +int axyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB); +int axybzMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA, + void *deviceVecB, cuDoubleComplex beta, + void *deviceVecZ); +int absMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void *deviceVecA); +int absMultiVecDeviceDoubleComplex2(int n, cuDoubleComplex alpha, + void *deviceVecA, void *deviceVecB); + diff --git a/docs/html/cmsy10-42.png b/docs/html/cmsy10-42.png index bd31f1f8..3ebb79e6 100644 Binary files a/docs/html/cmsy10-42.png and b/docs/html/cmsy10-42.png differ diff --git a/docs/html/cmsy10-48.png b/docs/html/cmsy10-48.png index df31f928..04f134fe 100644 Binary files a/docs/html/cmsy10-48.png and b/docs/html/cmsy10-48.png differ diff --git a/docs/html/cmsy10-49.png b/docs/html/cmsy10-49.png index cb292c09..37ce5dc4 100644 Binary files a/docs/html/cmsy10-49.png and b/docs/html/cmsy10-49.png differ diff --git a/docs/html/dia.png b/docs/html/dia.png new file mode 100644 index 00000000..de7db919 Binary files /dev/null and b/docs/html/dia.png differ diff --git a/docs/html/ell.png b/docs/html/ell.png new file mode 100644 index 00000000..31911882 Binary files /dev/null and b/docs/html/ell.png differ diff --git a/docs/html/hdia.png b/docs/html/hdia.png new file mode 100644 index 00000000..08bfb5ff Binary files /dev/null and b/docs/html/hdia.png differ diff --git a/docs/html/hll.png b/docs/html/hll.png new file mode 100644 index 00000000..219b751a Binary files /dev/null and b/docs/html/hll.png differ diff --git a/docs/html/index.html b/docs/html/index.html index c4f777e4..083bd90a 100644 --- a/docs/html/index.html +++ b/docs/html/index.html @@ -10,7 +10,7 @@ -

PSBLAS
User’s and Reference Guide
Salvatore Filippone
Alfredo Buttari
Software version: 3.8.0
May 1st, 2022 +class="newline" />Software version: 3.9.0
Aug 1st, 2024 @@ -29,219 +29,39 @@ class="newline" />May 1st, 2022

- Contents -
1  1 Introduction -
2  2 General overview -
 2.1 Basic Nomenclature -
 2.2 Library contents -
 2.3 Application structure -
 2.4 Programming model -
3 Data Structures and Classes -
 3.1 Descriptor data structure -
 3.2 Sparse Matrix class -
 3.3 Dense Vector Data Structure -
 3.4 Preconditioner data structure -
 3.5 Heap data structure -
4 Computational routines -
 4.1 psb_geaxpby — General Dense Matrix Sum -
 4.2 psb_gedot — Dot Product -
 4.3 psb_gedots — Generalized Dot Product -
 4.4 psb_normi — Infinity-Norm of Vector -
 4.5 psb_geamaxs — Generalized Infinity Norm -
 4.6 psb_norm1 — 1-Norm of Vector -
 4.7 psb_geasums — Generalized 1-Norm of Vector -
 4.8 psb_norm2 — 2-Norm of Vector -
 4.9 psb_genrm2s — Generalized 2-Norm of Vector -
 4.10 psb_norm1 — 1-Norm of Sparse Matrix -
 4.11 psb_normi — Infinity Norm of Sparse Matrix -
 4.12 psb_spmm — Sparse Matrix by Dense Matrix Product -
 4.13 psb_spsm — Triangular System Solve -
 4.14 psb_gemlt — Entrywise Product -
 4.15 psb_gediv — Entrywise Division -
 4.16 psb_geinv — Entrywise Inversion -
5 Communication routines -
 5.1 psb_halo — Halo Data Communication -
 5.2 psb_ovrl — Overlap Update -
 5.3 psb_gather — Gather Global Dense Matrix -
 5.4 psb_scatter — Scatter Global Dense Matrix -
6 Data management routines -
 6.1 psb_cdall — Allocates a communication descriptor -
 6.2 psb_cdins — Communication descriptor insert routine -
 6.3 psb_cdasb — Communication descriptor assembly routine -
 6.4 psb_cdcpy — Copies a communication descriptor -
 6.5 psb_cdfree — Frees a communication descriptor -
 6.6 psb_cdbldext — Build an extended communication descriptor -
 6.7 psb_spall — Allocates a sparse matrix -
 6.8 psb_spins — Insert a set of coefficients into a sparse matrix -
 6.9 psb_spasb — Sparse matrix assembly routine -
 6.10 psb_spfree — Frees a sparse matrix -
 6.11 psb_sprn — Reinit sparse matrix structure for psblas routines. -
 6.12 psb_geall — Allocates a dense matrix - +
 3 Data Structures and Classes +
 4 Computational routines +
 5 Communication routines +
 6 Data management routines +
 7 Parallel environment routines +
 8 Error handling +
 9 Utilities +
 10 Preconditioner routines +
 11 Iterative Methods +
 12 Extensions +
 13 CUDA Environment Routines +
 References +
- -
 6.13 psb_geins — Dense matrix insertion routine -
 6.14 psb_geasb — Assembly a dense matrix -
 6.15 psb_gefree — Frees a dense matrix -
 6.16 psb_gelp — Applies a left permutation to a dense matrix -
 6.17 psb_glob_to_loc — Global to local indices convertion -
 6.18 psb_loc_to_glob — Local to global indices conversion -
 6.19 psb_is_owned — -
 6.20 psb_owned_index — -
 6.21 psb_is_local — -
 6.22 psb_local_index — -
 6.23 psb_get_boundary — Extract list of boundary elements -
 6.24 psb_get_overlap — Extract list of overlap elements -
 6.25 psb_sp_getrow — Extract row(s) from a sparse matrix -
 6.26 psb_sizeof — Memory occupation -
 6.27 Sorting utilities — -
7 Parallel environment routines -
 7.1 psb_init — Initializes PSBLAS parallel environment -
 7.2 psb_info — Return information about PSBLAS parallel environment -
 7.3 psb_exit — Exit from PSBLAS parallel environment -
 7.4 psb_get_mpi_comm — Get the MPI communicator -
 7.5 psb_get_mpi_rank — Get the MPI rank -
 7.6 psb_wtime — Wall clock timing -
 7.7 psb_barrier — Sinchronization point parallel environment -
 7.8 psb_abort — Abort a computation -
 7.9 psb_bcast — Broadcast data -
 7.10 psb_sum — Global sum -
 7.11 psb_max — Global maximum -
 7.12 psb_min — Global minimum -
 7.13 psb_amx — Global maximum absolute value -
 7.14 psb_amn — Global minimum absolute value -
 7.15 psb_nrm2 — Global 2-norm reduction -
 7.16 psb_snd — Send data -
 7.17 psb_rcv — Receive data -
8 Error handling -
 8.1 psb_errpush — Pushes an error code onto the error stack -
 8.2 psb_error — Prints the error stack content and aborts execution -
 8.3 psb_set_errverbosity — Sets the verbosity of error messages -
 8.4 psb_set_erraction — Set the type of action to be taken upon error condition -
9 Utilities -
 9.1 hb_read — Read a sparse matrix from a file in the Harwell–Boeing format -
 9.2 hb_write — Write a sparse matrix to a file in the Harwell–Boeing format -
 9.3 mm_mat_read — Read a sparse matrix from a file in the MatrixMarket format -
 9.4 mm_array_read — Read a dense array from a file in the MatrixMarket format -
 9.5 mm_mat_write — Write a sparse matrix to a file in the MatrixMarket format -
 9.6 mm_array_write — Write a dense array from a file in the MatrixMarket format -
10 Preconditioner routines -
 10.1 init — Initialize a preconditioner -
 10.2 build — Builds a preconditioner -
 10.3 apply — Preconditioner application routine -
 10.4 descr — Prints a description of current preconditioner - - -
 10.5 clone — clone current preconditioner -
 10.6 free — Free a preconditioner -
11 Iterative Methods -
 11.1 psb_krylov — Krylov Methods Driver Routine -
References - diff --git a/docs/html/mat.png b/docs/html/mat.png new file mode 100644 index 00000000..d4f5c6f9 Binary files /dev/null and b/docs/html/mat.png differ diff --git a/docs/html/psblaslibraryext.png b/docs/html/psblaslibraryext.png new file mode 100644 index 00000000..da3b3a9c Binary files /dev/null and b/docs/html/psblaslibraryext.png differ diff --git a/docs/html/userhtml.css b/docs/html/userhtml.css index 31d7eb99..9dc17886 100644 --- a/docs/html/userhtml.css +++ b/docs/html/userhtml.css @@ -33,21 +33,26 @@ p.indent{text-indent:0;} p + p{margin-top:1em;} p + div, p + pre {margin-top:1em;} div + p, pre + p {margin-top:1em;} +a { overflow-wrap: break-word; word-wrap: break-word; word-break: break-word; hyphens: auto; } @media print {div.crosslinks {visibility:hidden;}} +table.tabular{border-collapse: collapse; border-spacing: 0;} a img { border-top: 0; border-left: 0; border-right: 0; } center { margin-top:1em; margin-bottom:1em; } td center { margin-top:0em; margin-bottom:0em; } .Canvas { position:relative; } img.math{vertical-align:middle;} +div.par-math-display, div.math-display{text-align:center;} li p.indent { text-indent: 0em } li p:first-child{ margin-top:0em; } li p:last-child, li div:last-child { margin-bottom:0.5em; } +li p:first-child{ margin-bottom:0; } li p~ul:last-child, li p~ol:last-child{ margin-bottom:0.5em; } .enumerate1 {list-style-type:decimal;} .enumerate2 {list-style-type:lower-alpha;} .enumerate3 {list-style-type:lower-roman;} .enumerate4 {list-style-type:upper-alpha;} div.newtheorem { margin-bottom: 2em; margin-top: 2em;} +div.newtheorem .head{font-weight: bold;} .obeylines-h,.obeylines-v {white-space: nowrap; } div.obeylines-v p { margin-top:0; margin-bottom:0; } .overline{ text-decoration:overline; } @@ -91,6 +96,9 @@ table[rules] {border-left:solid black 0.4pt; border-right:solid black 0.4pt; } .hline hr, .cline hr{ height : 0px; margin:0px; } .hline td, .cline td{ padding: 0; } .hline hr, .cline hr{border:none;border-top:1px solid black;} +.hline {border-top: 1px solid black;} +.hline + .vspace:last-child{display:none;} +.hline:first-child{border-bottom:1px solid black;border-top:none;} .tabbing-right {text-align:right;} div.float, div.figure {margin-left: auto; margin-right: auto;} div.float img {text-align:center;} @@ -115,15 +123,16 @@ table.pmatrix {width:100%;} span.bar-css {text-decoration:overline;} img.cdots{vertical-align:middle;} .partToc a, .partToc, .likepartToc a, .likepartToc {line-height: 200%; font-weight:bold; font-size:110%;} +.chapterToc a, .chapterToc, .likechapterToc a, .likechapterToc, .appendixToc a, .appendixToc {line-height: 200%; font-weight:bold;} .index-item, .index-subitem, .index-subsubitem {display:block} div.caption {text-indent:-2em; margin-left:3em; margin-right:1em; text-align:left;} div.caption span.id{font-weight: bold; white-space: nowrap; } h1.partHead{text-align: center} p.bibitem { text-indent: -2em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; } p.bibitem-p { text-indent: 0em; margin-left: 2em; margin-top:0.6em; margin-bottom:0.6em; } +.subsubsectionHead, .likesubsubsectionHead { font-size: 1em; } .paragraphHead, .likeparagraphHead { margin-top:2em; font-weight: bold;} .subparagraphHead, .likesubparagraphHead { font-weight: bold;} -.quote {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; margin-right:1em; text-align:justify;} .verse{white-space:nowrap; margin-left:2em} div.maketitle {text-align:center;} h2.titleHead{text-align:center;} @@ -131,19 +140,23 @@ div.maketitle{ margin-bottom: 2em; } div.author, div.date {text-align:center;} div.thanks{text-align:left; margin-left:10%; font-size:85%; font-style:italic; } div.author{white-space: nowrap;} -.quotation {margin-bottom:0.25em; margin-top:0.25em; margin-left:1em; } -.abstract p {margin-left:5%; margin-right:5%;} +div.abstract p {margin-left:5%; margin-right:5%;} div.abstract {width:100%;} +.abstracttitle{text-align:center;margin-bottom:1em;} .subsectionToc, .likesubsectionToc {margin-left:2em;} .subsubsectionToc, .likesubsubsectionToc {margin-left:4em;} +.paragraphToc, .likeparagraphToc {margin-left:6em;} +.subparagraphToc, .likesubparagraphToc {margin-left:8em;} .ovalbox { padding-left:3pt; padding-right:3pt; border:solid thin; } .Ovalbox-thick { padding-left:3pt; padding-right:3pt; border:solid thick; } .shadowbox { padding-left:3pt; padding-right:3pt; border:solid thin; border-right:solid thick; border-bottom:solid thick; } .doublebox { padding-left:3pt; padding-right:3pt; border-style:double; border:solid thick; } .rotatebox{display: inline-block;} +code.lstinline{font-family:monospace,monospace;} +pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; } .lstlisting .label{margin-right:0.5em; } -div.lstlisting{font-family: monospace,monospace; white-space: nowrap; margin-top:0.5em; margin-bottom:0.5em; } -div.lstinputlisting{ font-family: monospace,monospace; white-space: nowrap; } +pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; } +pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; } .lstinputlisting .label{margin-right:0.5em;} /* end css.sty */ diff --git a/docs/html/userhtml.html b/docs/html/userhtml.html index c4f777e4..083bd90a 100644 --- a/docs/html/userhtml.html +++ b/docs/html/userhtml.html @@ -10,7 +10,7 @@ -

PSBLAS
User’s and Reference Guide
Salvatore Filippone
Alfredo Buttari
Software version: 3.8.0
May 1st, 2022 +class="newline" />Software version: 3.9.0
Aug 1st, 2024 @@ -29,219 +29,39 @@ class="newline" />May 1st, 2022

- Contents -
1  1 Introduction -
2  2 General overview -
 2.1 Basic Nomenclature -
 2.2 Library contents -
 2.3 Application structure -
 2.4 Programming model -
3 Data Structures and Classes -
 3.1 Descriptor data structure -
 3.2 Sparse Matrix class -
 3.3 Dense Vector Data Structure -
 3.4 Preconditioner data structure -
 3.5 Heap data structure -
4 Computational routines -
 4.1 psb_geaxpby — General Dense Matrix Sum -
 4.2 psb_gedot — Dot Product -
 4.3 psb_gedots — Generalized Dot Product -
 4.4 psb_normi — Infinity-Norm of Vector -
 4.5 psb_geamaxs — Generalized Infinity Norm -
 4.6 psb_norm1 — 1-Norm of Vector -
 4.7 psb_geasums — Generalized 1-Norm of Vector -
 4.8 psb_norm2 — 2-Norm of Vector -
 4.9 psb_genrm2s — Generalized 2-Norm of Vector -
 4.10 psb_norm1 — 1-Norm of Sparse Matrix -
 4.11 psb_normi — Infinity Norm of Sparse Matrix -
 4.12 psb_spmm — Sparse Matrix by Dense Matrix Product -
 4.13 psb_spsm — Triangular System Solve -
 4.14 psb_gemlt — Entrywise Product -
 4.15 psb_gediv — Entrywise Division -
 4.16 psb_geinv — Entrywise Inversion -
5 Communication routines -
 5.1 psb_halo — Halo Data Communication -
 5.2 psb_ovrl — Overlap Update -
 5.3 psb_gather — Gather Global Dense Matrix -
 5.4 psb_scatter — Scatter Global Dense Matrix -
6 Data management routines -
 6.1 psb_cdall — Allocates a communication descriptor -
 6.2 psb_cdins — Communication descriptor insert routine -
 6.3 psb_cdasb — Communication descriptor assembly routine -
 6.4 psb_cdcpy — Copies a communication descriptor -
 6.5 psb_cdfree — Frees a communication descriptor -
 6.6 psb_cdbldext — Build an extended communication descriptor -
 6.7 psb_spall — Allocates a sparse matrix -
 6.8 psb_spins — Insert a set of coefficients into a sparse matrix -
 6.9 psb_spasb — Sparse matrix assembly routine -
 6.10 psb_spfree — Frees a sparse matrix -
 6.11 psb_sprn — Reinit sparse matrix structure for psblas routines. -
 6.12 psb_geall — Allocates a dense matrix - +
 3 Data Structures and Classes +
 4 Computational routines +
 5 Communication routines +
 6 Data management routines +
 7 Parallel environment routines +
 8 Error handling +
 9 Utilities +
 10 Preconditioner routines +
 11 Iterative Methods +
 12 Extensions +
 13 CUDA Environment Routines +
 References +
- -
 6.13 psb_geins — Dense matrix insertion routine -
 6.14 psb_geasb — Assembly a dense matrix -
 6.15 psb_gefree — Frees a dense matrix -
 6.16 psb_gelp — Applies a left permutation to a dense matrix -
 6.17 psb_glob_to_loc — Global to local indices convertion -
 6.18 psb_loc_to_glob — Local to global indices conversion -
 6.19 psb_is_owned — -
 6.20 psb_owned_index — -
 6.21 psb_is_local — -
 6.22 psb_local_index — -
 6.23 psb_get_boundary — Extract list of boundary elements -
 6.24 psb_get_overlap — Extract list of overlap elements -
 6.25 psb_sp_getrow — Extract row(s) from a sparse matrix -
 6.26 psb_sizeof — Memory occupation -
 6.27 Sorting utilities — -
7 Parallel environment routines -
 7.1 psb_init — Initializes PSBLAS parallel environment -
 7.2 psb_info — Return information about PSBLAS parallel environment -
 7.3 psb_exit — Exit from PSBLAS parallel environment -
 7.4 psb_get_mpi_comm — Get the MPI communicator -
 7.5 psb_get_mpi_rank — Get the MPI rank -
 7.6 psb_wtime — Wall clock timing -
 7.7 psb_barrier — Sinchronization point parallel environment -
 7.8 psb_abort — Abort a computation -
 7.9 psb_bcast — Broadcast data -
 7.10 psb_sum — Global sum -
 7.11 psb_max — Global maximum -
 7.12 psb_min — Global minimum -
 7.13 psb_amx — Global maximum absolute value -
 7.14 psb_amn — Global minimum absolute value -
 7.15 psb_nrm2 — Global 2-norm reduction -
 7.16 psb_snd — Send data -
 7.17 psb_rcv — Receive data -
8 Error handling -
 8.1 psb_errpush — Pushes an error code onto the error stack -
 8.2 psb_error — Prints the error stack content and aborts execution -
 8.3 psb_set_errverbosity — Sets the verbosity of error messages -
 8.4 psb_set_erraction — Set the type of action to be taken upon error condition -
9 Utilities -
 9.1 hb_read — Read a sparse matrix from a file in the Harwell–Boeing format -
 9.2 hb_write — Write a sparse matrix to a file in the Harwell–Boeing format -
 9.3 mm_mat_read — Read a sparse matrix from a file in the MatrixMarket format -
 9.4 mm_array_read — Read a dense array from a file in the MatrixMarket format -
 9.5 mm_mat_write — Write a sparse matrix to a file in the MatrixMarket format -
 9.6 mm_array_write — Write a dense array from a file in the MatrixMarket format -
10 Preconditioner routines -
 10.1 init — Initialize a preconditioner -
 10.2 build — Builds a preconditioner -
 10.3 apply — Preconditioner application routine -
 10.4 descr — Prints a description of current preconditioner - - -
 10.5 clone — clone current preconditioner -
 10.6 free — Free a preconditioner -
11 Iterative Methods -
 11.1 psb_krylov — Krylov Methods Driver Routine -
References - diff --git a/docs/html/userhtml0x.png b/docs/html/userhtml0x.png index bed5b304..3e324fc8 100644 Binary files a/docs/html/userhtml0x.png and b/docs/html/userhtml0x.png differ diff --git a/docs/html/userhtml10x.png b/docs/html/userhtml10x.png index a0d4d0d4..5db77543 100644 Binary files a/docs/html/userhtml10x.png and b/docs/html/userhtml10x.png differ diff --git a/docs/html/userhtml11x.png b/docs/html/userhtml11x.png index 634594e1..e168a574 100644 Binary files a/docs/html/userhtml11x.png and b/docs/html/userhtml11x.png differ diff --git a/docs/html/userhtml12x.png b/docs/html/userhtml12x.png index 7257667a..df163814 100644 Binary files a/docs/html/userhtml12x.png and b/docs/html/userhtml12x.png differ diff --git a/docs/html/userhtml13x.png b/docs/html/userhtml13x.png index 4c70ecb3..108c35cd 100644 Binary files a/docs/html/userhtml13x.png and b/docs/html/userhtml13x.png differ diff --git a/docs/html/userhtml14x.png b/docs/html/userhtml14x.png index 711ce998..9a0f37dd 100644 Binary files a/docs/html/userhtml14x.png and b/docs/html/userhtml14x.png differ diff --git a/docs/html/userhtml15x.png b/docs/html/userhtml15x.png index 00e0d73d..feaec761 100644 Binary files a/docs/html/userhtml15x.png and b/docs/html/userhtml15x.png differ diff --git a/docs/html/userhtml16.html b/docs/html/userhtml16.html new file mode 100644 index 00000000..88e57e0e --- /dev/null +++ b/docs/html/userhtml16.html @@ -0,0 +1,19 @@ + + + + + + + + + + +
+

4The string is case-insensitive

+ + diff --git a/docs/html/userhtml16x.png b/docs/html/userhtml16x.png index 95794ed0..3f5b67d7 100644 Binary files a/docs/html/userhtml16x.png and b/docs/html/userhtml16x.png differ diff --git a/docs/html/userhtml17x.png b/docs/html/userhtml17x.png index 27a7387e..26e4a3a9 100644 Binary files a/docs/html/userhtml17x.png and b/docs/html/userhtml17x.png differ diff --git a/docs/html/userhtml18.html b/docs/html/userhtml18.html new file mode 100644 index 00000000..245d8cd5 --- /dev/null +++ b/docs/html/userhtml18.html @@ -0,0 +1,20 @@ + + + + + + + + + + +
+

5Note: the implementation is for FCG(1).

+ diff --git a/docs/html/userhtml18x.png b/docs/html/userhtml18x.png index ebcb935d..d33d825c 100644 Binary files a/docs/html/userhtml18x.png and b/docs/html/userhtml18x.png differ diff --git a/docs/html/userhtml19x.png b/docs/html/userhtml19x.png index 5ac0302a..a44a8a1a 100644 Binary files a/docs/html/userhtml19x.png and b/docs/html/userhtml19x.png differ diff --git a/docs/html/userhtml1x.png b/docs/html/userhtml1x.png index 465c255b..933f19a7 100644 Binary files a/docs/html/userhtml1x.png and b/docs/html/userhtml1x.png differ diff --git a/docs/html/userhtml20x.png b/docs/html/userhtml20x.png index f9f0de44..170b8f17 100644 Binary files a/docs/html/userhtml20x.png and b/docs/html/userhtml20x.png differ diff --git a/docs/html/userhtml21x.png b/docs/html/userhtml21x.png index 85201b8c..10ebc465 100644 Binary files a/docs/html/userhtml21x.png and b/docs/html/userhtml21x.png differ diff --git a/docs/html/userhtml22x.png b/docs/html/userhtml22x.png index 8c882bc2..700fc628 100644 Binary files a/docs/html/userhtml22x.png and b/docs/html/userhtml22x.png differ diff --git a/docs/html/userhtml23x.png b/docs/html/userhtml23x.png index 30c84d3d..eeeb8317 100644 Binary files a/docs/html/userhtml23x.png and b/docs/html/userhtml23x.png differ diff --git a/docs/html/userhtml24x.png b/docs/html/userhtml24x.png index 0910fe9c..194d79c7 100644 Binary files a/docs/html/userhtml24x.png and b/docs/html/userhtml24x.png differ diff --git a/docs/html/userhtml25x.png b/docs/html/userhtml25x.png index 88630c50..51f10fd7 100644 Binary files a/docs/html/userhtml25x.png and b/docs/html/userhtml25x.png differ diff --git a/docs/html/userhtml26x.png b/docs/html/userhtml26x.png index ac0518e5..37257ff8 100644 Binary files a/docs/html/userhtml26x.png and b/docs/html/userhtml26x.png differ diff --git a/docs/html/userhtml27x.png b/docs/html/userhtml27x.png index 57beeb01..bd9e8f4f 100644 Binary files a/docs/html/userhtml27x.png and b/docs/html/userhtml27x.png differ diff --git a/docs/html/userhtml28x.png b/docs/html/userhtml28x.png index b2106916..112718a9 100644 Binary files a/docs/html/userhtml28x.png and b/docs/html/userhtml28x.png differ diff --git a/docs/html/userhtml29x.png b/docs/html/userhtml29x.png index b4efccdb..46a83099 100644 Binary files a/docs/html/userhtml29x.png and b/docs/html/userhtml29x.png differ diff --git a/docs/html/userhtml2x.png b/docs/html/userhtml2x.png index 49dbf305..bc929ca7 100644 Binary files a/docs/html/userhtml2x.png and b/docs/html/userhtml2x.png differ diff --git a/docs/html/userhtml30x.png b/docs/html/userhtml30x.png index 509285f3..276f94cf 100644 Binary files a/docs/html/userhtml30x.png and b/docs/html/userhtml30x.png differ diff --git a/docs/html/userhtml31x.png b/docs/html/userhtml31x.png index 72203987..55f88e51 100644 Binary files a/docs/html/userhtml31x.png and b/docs/html/userhtml31x.png differ diff --git a/docs/html/userhtml32x.png b/docs/html/userhtml32x.png index a92c5455..f1fb77ea 100644 Binary files a/docs/html/userhtml32x.png and b/docs/html/userhtml32x.png differ diff --git a/docs/html/userhtml3x.png b/docs/html/userhtml3x.png index 3676aa53..29404472 100644 Binary files a/docs/html/userhtml3x.png and b/docs/html/userhtml3x.png differ diff --git a/docs/html/userhtml4x.png b/docs/html/userhtml4x.png index 1ce118eb..e17fca31 100644 Binary files a/docs/html/userhtml4x.png and b/docs/html/userhtml4x.png differ diff --git a/docs/html/userhtml5.html b/docs/html/userhtml5.html index 0664a74b..8231fcf4 100644 --- a/docs/html/userhtml5.html +++ b/docs/html/userhtml5.html @@ -12,7 +12,8 @@ >

1 1In our prototype implementation we provide sample scatter/gather routines.

diff --git a/docs/html/userhtml5x.png b/docs/html/userhtml5x.png index cabc66d4..324d7f45 100644 Binary files a/docs/html/userhtml5x.png and b/docs/html/userhtml5x.png differ diff --git a/docs/html/userhtml6.html b/docs/html/userhtml6.html new file mode 100644 index 00000000..d6dbd1dc --- /dev/null +++ b/docs/html/userhtml6.html @@ -0,0 +1,24 @@ + + + + + + + + + + +
+

2This is the normal situation when the pattern of the sparse matrix is symmetric, which is + equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is + non-symmetric we may have one-way interactions, and these could cause a situation in which a + boundary point is not a halo point for its neighbour.

+ diff --git a/docs/html/userhtml6x.png b/docs/html/userhtml6x.png index a0ef30ce..72a36e5d 100644 Binary files a/docs/html/userhtml6x.png and b/docs/html/userhtml6x.png differ diff --git a/docs/html/userhtml7.html b/docs/html/userhtml7.html index ae82b72f..f61a7dac 100644 --- a/docs/html/userhtml7.html +++ b/docs/html/userhtml7.html @@ -11,13 +11,16 @@
-

2This is the normal situation when the pattern of the sparse matrix is symmetric, which is +

3The subroutine style psb_precinit and psb_precbl are still supported for backward equivalent to say that the interaction between two variables is reciprocal. If the matrix pattern is - non-symmetric we may have one-way interactions, and these could cause a situation in which a - boundary point is not a halo point for its neighbour.

+class="cmr-8">compatibility diff --git a/docs/html/userhtml7x.png b/docs/html/userhtml7x.png index a0d4d0d4..5db77543 100644 Binary files a/docs/html/userhtml7x.png and b/docs/html/userhtml7x.png differ diff --git a/docs/html/userhtml8x.png b/docs/html/userhtml8x.png index 147d6b03..221e309e 100644 Binary files a/docs/html/userhtml8x.png and b/docs/html/userhtml8x.png differ diff --git a/docs/html/userhtml9x.png b/docs/html/userhtml9x.png index dcf8438f..72555397 100644 Binary files a/docs/html/userhtml9x.png and b/docs/html/userhtml9x.png differ diff --git a/docs/html/userhtmlli1.html b/docs/html/userhtmlli1.html index 2f706b63..e767ddcf 100644 --- a/docs/html/userhtmlli1.html +++ b/docs/html/userhtmlli1.html @@ -10,306 +10,346 @@ - -

10 Preconditioner routines

+ id="x15-13500010">Preconditioner routines

The base PSBLAS library contains the implementation of two simple preconditioning techniques:

    -
  • Diagonal Scaling +
  • +

    Diagonal Scaling

  • -
  • Block Jacobi with ILU(0) factorization
+
  • +

    Block Jacobi with ILU(0) factorization

  • The supporting data type and subroutine interfaces are defined in the module psb_prec_mod. The old interfaces -  10.1 init — Initialize a preconditioner -
     10.2 build — Builds a preconditioner -
     10.3 apply — Preconditioner application routine -
     10.4 descr — Prints a description of current preconditioner -
     10.5 clone — clone current preconditioner -
     10.6 free — Free a preconditioner - +

    10.1 init — Initialize a preconditioner

    + + + +
    +call prec%init(icontxt,ptype, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +icontxt

    +

    the communication context.
    Scope:global.
    Type:required.
    Intent: in.
    Specified as: an integer value. +

    +

    +ptype

    +

    the type of preconditioner. Scope: global
    Type: required
    Intent: in.
    Specified as: a character string, see usage notes. +

    +

    +On Exit

    +

    +

    +

    +prec

    +

    Scope: local
    Type: required
    Intent: inout.
    Specified as: a preconditioner data structure psb_prec_type. + + + +

    +

    +info

    +

    Scope: global
    Type: required
    Intent: out.
    Error code: if no error, 0 is returned.

    +

    Notes Legal inputs to this subroutine are interpreted depending on the ptype string as +follows4 : +

    +

    +NONE

    +

    No preconditioning, i.e. the preconditioner is just a copy operator. +

    +

    +DIAG

    +

    Diagonal scaling; each entry of the input vector is multiplied by the + reciprocal of the sum of the absolute values of the coefficients in the + corresponding row of matrix A; +

    +

    +BJAC

    +

    Precondition by a factorization of the block-diagonal of matrix A, where + block boundaries are determined by the data allocation boundaries + for each process; requires no communication. Only the incomplete + factorization ILU(0) is currently implemented.

    + + + +

    10.2 build — Builds a preconditioner

    + + + +
    +call prec%build(a, desc_a, info[,amold,vmold,imold])
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +a

    +

    the system sparse matrix. Scope: local
    Type: required
    Intent: in, target.
    Specified as: a sparse matrix data structure psb_Tspmat_type. +

    +

    +prec

    +

    the preconditioner.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: an already initialized precondtioner data structure + psb_prec_type
    +

    +

    +desc_a

    +

    the problem communication descriptor. Scope: local
    Type: required
    Intent: in, target.
    Specified as: a communication descriptor data structure psb_desc_type. +

    +

    +amold

    + + + +

    The desired dynamic type for the internal matrix storage.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: an object of a class derived from psb_T_base_sparse_mat. +

    +

    +vmold

    +

    The desired dynamic type for the internal vector storage.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: an object of a class derived from psb_T_base_vect_type. +

    +

    +imold

    +

    The desired dynamic type for the internal integer vector storage.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: an object of a class derived from (integer) + psb_T_base_vect_type.

    +

    +

    +

    +On Return

    +

    +

    +

    +prec

    +

    the preconditioner.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a precondtioner data structure psb_prec_type
    +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    The amold, vmold and imold arguments may be employed to interface with special +devices, such as GPUs and other accelerators. + + + +

    10.3 apply — Preconditioner application routine

    + + + +
    +call prec%apply(x,y,desc_a,info,trans,work)
    +call prec%apply(x,desc_a,info,trans)
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +prec

    +

    the preconditioner. Scope: local
    Type: required
    Intent: in.
    Specified as: a preconditioner data structure psb_prec_type. +

    +

    +x

    +

    the source vector. Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one array or an object of type psb_T_vect_type. +

    +

    +desc_a

    +

    the problem communication descriptor. Scope: local
    Type: required
    Intent: in.
    Specified as: a communication data structure psb_desc_type. +

    +

    +trans

    +

    Scope:
    Type: optional
    Intent: in.
    Specified as: a character. + + + +

    +

    +work

    +

    an optional work space Scope: local
    Type: optional
    Intent: inout.
    Specified as: a double precision array.

    +

    +

    +

    +On Return

    +

    +

    +

    +y

    +

    the destination vector. Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one array or an object of type psb_T_vect_type. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    10.4 descr — Prints a description of current preconditioner

    + + + +
    +call prec%descr(info)
    +call prec%descr(info,iout, root)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +prec

    +

    the preconditioner. Scope: local
    Type: required
    Intent: in.
    Specified as: a preconditioner data structure psb_prec_type. +

    +

    +iout

    +

    output unit. Scope: local
    Type: optional
    Intent: in.
    Specified as: an integer number. Default: default output unit. +

    +

    +root

    +

    Process from which to print Scope: local
    Type: optional
    Intent: in.
    Specified as: an integer number between 0 and np - 1, in which case + the specified process will print the description, or -1, in which case all + processes will print. Default: 0. +

    +

    +On Return

    +

    + + + +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    10.5 clone — clone current preconditioner

    + + +
    +call  prec%clone(precout,info)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +prec

    +

    the preconditioner.
    Scope: local.

    +

    +

    +

    +On Return

    +

    +

    +

    +precout

    +

    A copy of the input object. +

    +

    +info

    +

    Return code.

    + + +

    10.6 free — Free a preconditioner

    + + +
    +call prec%free(info)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +prec

    +

    the preconditioner.
    Scope: local.
    Type: required
    Intent: inout.
    Specified as: a preconditioner data structure psb_prec_type. +

    +

    +On Exit

    +

    +

    +

    +prec

    +

    Scope: local
    Type: required
    Intent: inout.
    Specified as: a preconditioner data structure psb_prec_type. +

    +

    +info

    +

    Scope: global
    Type: required
    Intent: out.
    Error code: if no error, 0 is returned.

    +

    Notes Releases all internal storage. + + + + + + +

    +href="userhtml.html#userhtmlse13.html" >up]

    + id="tailuserhtmlse10.html"> diff --git a/docs/html/userhtmlse11.html b/docs/html/userhtmlse11.html index a9cc2529..41a4cd65 100644 --- a/docs/html/userhtmlse11.html +++ b/docs/html/userhtmlse11.html @@ -13,10 +13,10 @@

    11 Iterative Methods

    + id="x17-14200011">Iterative Methods

    In this chapter we provide routines for preconditioners and iterative methods. The interfaces for Krylov subspace methods are available in the module psb_krylov_mod. -

    +

    11.1 psb_krylov — Krylov Methods Driver Routine

    +

    This subroutine is a driver that provides a general interface for all the Krylov-Subspace +family methods implemented in PSBLAS version 2. +

    The stopping criterion can take the following values: +

    +

    +1

    +

    normwise backward error in the infinity norm; the iteration is stopped + when +

    +      -----∥ri∥------
+err = (∥A∥∥xi∥+ ∥b∥) < eps
+
    +

    +

    +

    +2

    +

    Relative residual in the 2-norm; the iteration is stopped when +

    +      ∥ri∥-
+err = ∥b∥2 < eps
+
    +

    +

    +

    +3

    +

    Relative residual reduction in the 2-norm; the iteration is stopped when +

    +      ∥ri∥
+err = ∥r0∥2 < eps
+
    +

    +

    The behaviour is controlled by the istop argument (see later). In the above formulae, xi +is the tentative solution and ri = b - Axi the corresponding residual at the i-th +iteration. + -

    4 Computational routines

    + id="x9-550004">Computational routines -
    -  4.1 psb_geaxpby — General Dense Matrix Sum -
     4.2 psb_gedot — Dot Product -
     4.3 psb_gedots — Generalized Dot Product -
     4.4 psb_normi — Infinity-Norm of Vector -
     4.5 psb_geamaxs — Generalized Infinity Norm -
     4.6 psb_norm1 — 1-Norm of Vector -
     4.7 psb_geasums — Generalized 1-Norm of Vector -
     4.8 psb_norm2 — 2-Norm of Vector -
     4.9 psb_genrm2s — Generalized 2-Norm of Vector -
     4.10 psb_norm1 — 1-Norm of Sparse Matrix -
     4.11 psb_normi — Infinity Norm of Sparse Matrix -
     4.12 psb_spmm — Sparse Matrix by Dense Matrix Product -
     4.13 psb_spsm — Triangular System Solve -
     4.14 psb_gemlt — Entrywise Product -
     4.15 psb_gediv — Entrywise Division -
     4.16 psb_geinv — Entrywise Inversion +

    4.1 psb_geaxpby — General Dense Matrix Sum

    +

    This subroutine is an interface to the computational kernel for dense matrix +sum: +

    +y ← α x+ βy
+
    +

    +

    call psb_geaxpby(alpha, x, beta, y, desc_a, info) +

    + + + +


    + + + +
    +

    +

    + + + + +


    x, y, α, β Subroutine


    Short Precision Real psb_geaxpby
    Long Precision Real psb_geaxpby
    Short Precision Complexpsb_geaxpby
    Long Precision Complex psb_geaxpby


    +
    Table 1: Data types
    + + + +

    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +alpha

    +

    the scalar α.
    Scope: global
    Type: required
    Intent: in.
    Specified as: a number of the data type indicated in Table 1. +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 1. The + rank of x must be the same of y. +

    +

    +beta

    +

    the scalar β.
    Scope: global
    Type: required
    Intent: in.
    Specified as: a number of the data type indicated in Table 1. +

    +

    +y

    +

    the local portion of the global dense matrix y.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of the type indicated in Table 1. + The rank of y must be the same of x. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +
    +

    +On Return

    +

    +

    +

    +y

    +

    the local portion of result submatrix y.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of the type indicated in Table 1. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + +

    4.2 psb_gedot — Dot Product

    +

    This function computes dot product between two vectors x and y.
    If x and y are real vectors it computes dot-product as: +

    +dot ← xTy
+
    +

    Else if x and y are complex vectors then it computes dot-product as: +

    +dot ← xHy
+
    +

    +

    psb_gedot(x, y, desc_a, info [,global])

    + + +


    + + +
    +

    +

    + + + + +


    dot, x, y Function


    Short Precision Real psb_gedot
    Long Precision Real psb_gedot
    Short Precision Complexpsb_gedot
    Long Precision Complex psb_gedot


    +
    Table 2: Data types
    + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 2. The + rank of x must be the same of y. +

    +

    +y

    +

    the local portion of global dense matrix y.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 2. The + rank of y must be the same of x. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +global

    + + +

    Specifies whether the computation should include the global reduction + across all processes.
    Scope: global
    Type: optional.
    Intent: in.
    Specified as: a logical scalar. Default: global=.true.
    +

    +

    +On Return

    +

    +

    +

    +Function value

    +

    is the dot product of vectors x and y.
    Scope: global unless the optional variable global=.false. has been + specified
    Specified as: a number of the data type indicated in Table 2. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to + compute multiple dot products at the same time; in this case, it is + possible to improve the runtime efficiency by using the following scheme: + +

         vres(1) = psb_gedot(x1,y1,desc_a,info,global=.false.) 
      +   vres(2) = psb_gedot(x2,y2,desc_a,info,global=.false.) 
      +   vres(3) = psb_gedot(x3,y3,desc_a,info,global=.false.) 
      +   call psb_sum(ctxt,vres(1:3))
      + +

      In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

    + + +

    4.3 psb_gedots — Generalized Dot Product

    +

    This subroutine computes a series of dot products among the columns of two dense +matrices x and y: +

    +res(i) ← x(:,i)Ty(:,i)
+
    +

    If the matrices are complex, then the usual convention applies, i.e. the conjugate +transpose of x is used. If x and y are of rank one, then res is a scalar, else it is a rank +one array. +

    call psb_gedots(res, x, y, desc_a, info) +

    + + +


    + + +
    +

    +

    + + + + +


    res, x, y Subroutine


    Short Precision Real psb_gedots
    Long Precision Real psb_gedots
    Short Precision Complexpsb_gedots
    Long Precision Complex psb_gedots


    +
    Table 3: Data types
    + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 3. The + rank of x must be the same of y. +

    +

    +y

    +

    the local portion of global dense matrix y.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 3. The + rank of y must be the same of x. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +On Return

    +

    + + +

    +

    +res

    +

    is the dot product of vectors x and y.
    Scope: global
    Intent: out.
    Specified as: a number or a rank-one array of the data type indicated in + Table 2. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + +

    4.4 psb_normi — Infinity-Norm of Vector

    +

    This function computes the infinity-norm of a vector x.
    If x is a real vector it computes infinity norm as: +

    +amax  ← max |xi|
+          i
+
    +

    else if x is a complex vector then it computes the infinity-norm as: +

    +amax ← maxi (|re(xi)|+ |im(xi)|)
+
    +

    +

    psb_geamax(x, desc_a, info [,global])
    psb_normi(x, desc_a, info [,global]) +

    + + +


    + + +
    +

    +

    + + + +



    amax x Function



    Short Precision RealShort Precision Real psb_geamax
    Long Precision Real Long Precision Real psb_geamax
    Short Precision RealShort Precision Complexpsb_geamax
    Long Precision Real Long Precision Complex psb_geamax



    +
    Table 4: Data types
    + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 4. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +global

    +

    Specifies whether the computation should include the global reduction + across all processes.
    Scope: global
    Type: optional.
    Intent: in.
    Specified as: a logical scalar. Default: global=.true.
    +

    +

    +On Return

    +

    + + + +

    +

    +Function value

    +

    is the infinity norm of vector x.
    Scope: global unless the optional variable global=.false. has been + specified
    Specified as: a long precision real number. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to compute + multiple norms at the same time; in this case, it is possible to improve the + runtime efficiency by using the following scheme: +

         vres(1) = psb_geamax(x1,desc_a,info,global=.false.) 
      +   vres(2) = psb_geamax(x2,desc_a,info,global=.false.) 
      +   vres(3) = psb_geamax(x3,desc_a,info,global=.false.) 
      +   call psb_amx(ctxt,vres(1:3))
      + +

      In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

    + + + +

    4.5 psb_geamaxs — Generalized Infinity Norm

    +

    This subroutine computes a series of infinity norms on the columns of a dense matrix +x: +

    +res(i) ← max|x(k,i)|
+         k
+
    +

    +

    call psb_geamaxs(res, x, desc_a, info) +

    + + + +


    + + + +
    +

    +

    + + + +



    res x Subroutine



    Short Precision RealShort Precision Real psb_geamaxs
    Long Precision Real Long Precision Real psb_geamaxs
    Short Precision RealShort Precision Complexpsb_geamaxs
    Long Precision Real Long Precision Complex psb_geamaxs



    +
    Table 5: Data types
    + + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 5. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +On Return

    +

    +

    +

    +res

    +

    is the infinity norm of the columns of x.
    Scope: global
    Intent: out.
    Specified as: a number or a rank-one array of long precision real numbers. +

    +

    +info

    + + + +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    4.6 psb_norm1 — 1-Norm of Vector

    +

    This function computes the 1-norm of a vector x.
    If x is a real vector it computes 1-norm as: +

    +asum ←  ∥xi∥
+
    +

    else if x is a complex vector then it computes 1-norm as: +

    +asum ←  ∥re(x)∥1 + ∥im (x)∥1
+
    +

    +

    psb_geasum(x, desc_a, info [,global]) psb_norm1(x, desc_a, info [,global]) +

    + + + +


    + + + +
    +

    +

    + + + +



    asum x Function



    Short Precision RealShort Precision Real psb_geasum
    Long Precision Real Long Precision Real psb_geasum
    Short Precision RealShort Precision Complexpsb_geasum
    Long Precision Real Long Precision Complex psb_geasum



    +
    Table 6: Data types
    + + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 6. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +global

    +

    Specifies whether the computation should include the global reduction + across all processes.
    Scope: global
    Type: optional.
    Intent: in.
    Specified as: a logical scalar. Default: global=.true.
    +

    +

    +On Return

    +

    + + + +

    +

    +Function value

    +

    is the 1-norm of vector x.
    Scope: global unless the optional variable global=.false. has been + specified
    Specified as: a long precision real number. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to compute + multiple norms at the same time; in this case, it is possible to improve the + runtime efficiency by using the following scheme: +

         vres(1) = psb_geasum(x1,desc_a,info,global=.false.) 
      +   vres(2) = psb_geasum(x2,desc_a,info,global=.false.) 
      +   vres(3) = psb_geasum(x3,desc_a,info,global=.false.) 
      +   call psb_sum(ctxt,vres(1:3))
      + +

      In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

    + + + +

    4.7 psb_geasums — Generalized 1-Norm of Vector

    +

    This subroutine computes a series of 1-norms on the columns of a dense matrix +x: +

    +res(i) ← max|x(k,i)|
+         k
+
    +

    This function computes the 1-norm of a vector x.
    If x is a real vector it computes 1-norm as: +

    +res(i) ← ∥xi∥
+
    +

    else if x is a complex vector then it computes 1-norm as: +

    +res(i) ← ∥re(x )∥1 +∥im (x )∥1
+
    +

    +

    call psb_geasums(res, x, desc_a, info) +

    + + + +


    + + + +
    +

    +

    + + + +



    res x Subroutine



    Short Precision RealShort Precision Real psb_geasums
    Long Precision Real Long Precision Real psb_geasums
    Short Precision RealShort Precision Complexpsb_geasums
    Long Precision Real Long Precision Complex psb_geasums



    +
    Table 7: Data types
    + + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 7. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +On Return

    +

    +

    +

    +res

    +

    contains the 1-norm of (the columns of) x.
    Scope: global
    Intent: out.
    Short as: a long precision real number. Specified as: a long precision real + number. + + + +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    4.8 psb_norm2 — 2-Norm of Vector

    +

    This function computes the 2-norm of a vector x.
    If x is a real vector it computes 2-norm as: +

    +nrm2  ← √xT-x-
+
    +

    else if x is a complex vector then it computes 2-norm as: +

    +        √----
+nrm2 ←   xHx
+
    +

    +

    + + + +


    + + + +
    +

    +

    + + + +



    nrm2 x Function



    Short Precision RealShort Precision Real psb_genrm2
    Long Precision Real Long Precision Real psb_genrm2
    Short Precision RealShort Precision Complexpsb_genrm2
    Long Precision Real Long Precision Complex psb_genrm2



    +
    Table 8: Data types
    + + + +

    +
    +

    psb_genrm2(x, desc_a, info [,global])
    psb_norm2(x, desc_a, info [,global])
    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 8. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +global

    +

    Specifies whether the computation should include the global reduction + across all processes.
    Scope: global
    Type: optional.
    Intent: in.
    Specified as: a logical scalar. Default: global=.true.
    +

    +

    +On Return

    + + + +

    +

    +

    +Function Value

    +

    is the 2-norm of vector x.
    Scope: global unless the optional variable global=.false. has been + specified
    Type: required
    Specified as: a long precision real number. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      The computation of a global result requires a global communication, which + entails a significant overhead. It may be necessary and/or advisable to compute + multiple norms at the same time; in this case, it is possible to improve the + runtime efficiency by using the following scheme: +

         vres(1) = psb_genrm2(x1,desc_a,info,global=.false.) 
      +   vres(2) = psb_genrm2(x2,desc_a,info,global=.false.) 
      +   vres(3) = psb_genrm2(x3,desc_a,info,global=.false.) 
      +   call psb_nrm2(ctxt,vres(1:3))
      + +

      In this way the global communication, which for small sizes is a latency-bound + operation, is invoked only once.

    + + + +

    4.9 psb_genrm2s — Generalized 2-Norm of Vector

    +

    This subroutine computes a series of 2-norms on the columns of a dense matrix +x: +

    +res(i) ← ∥x(:,i)∥2
+
    +

    +

    call psb_genrm2s(res, x, desc_a, info) +

    + + + +


    + + + +
    +

    +

    + + + +



    res x Subroutine



    Short Precision RealShort Precision Real psb_genrm2s
    Long Precision Real Long Precision Real psb_genrm2s
    Short Precision RealShort Precision Complexpsb_genrm2s
    Long Precision Real Long Precision Complex psb_genrm2s



    +
    Table 9: Data types
    + + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 9. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +On Return

    +

    +

    +

    +res

    +

    contains the 1-norm of (the columns of) x.
    Scope: global
    Intent: out.
    Specified as: a long precision real number. +

    +

    +info

    + + + +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    4.10 psb_norm1 — 1-Norm of Sparse Matrix

    +

    This function computes the 1-norm of a matrix A:
    +

    +nrm1 ← ∥A ∥1
+
    +

    where: +

    +

    +A

    +

    represents the global matrix A

    +
    + + + +


    + +
    +

    +

    + + + +


    A Function


    Short Precision Real psb_spnrm1
    Long Precision Real psb_spnrm1
    Short Precision Complexpsb_spnrm1
    Long Precision Complex psb_spnrm1


    +
    Table 10: Data types
    + + + +

    +
    + + + +
    +psb_spnrm1(A, desc_a, info)
    +psb_norm1(A, desc_a, info)
    +
    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +a

    +

    the local portion of the global sparse matrix A.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_Tspmat_type. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +On Return

    +

    +

    +

    +Function value

    +

    is the 1-norm of sparse submatrix A.
    Scope: global
    Specified as: a long precision real number. + + + +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    4.11 psb_normi — Infinity Norm of Sparse Matrix

    +

    This function computes the infinity-norm of a matrix A:
    +

    +nrmi ←  ∥A∥∞
+
    +

    where: +

    +

    +A

    +

    represents the global matrix A

    +
    + + + +


    + + + +
    +

    +

    + + + +


    A Function


    Short Precision Real psb_spnrmi
    Long Precision Real psb_spnrmi
    Short Precision Complexpsb_spnrmi
    Long Precision Complex psb_spnrmi


    +
    Table 11: Data types
    + + + +

    +
    + + + +
    +psb_spnrmi(A, desc_a, info)
    +psb_normi(A, desc_a, info)
    +
    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +a

    +

    the local portion of the global sparse matrix A.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_Tspmat_type. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +On Return

    +

    +

    +

    +Function value

    +

    is the infinity-norm of sparse submatrix A.
    Scope: global
    Specified as: a long precision real number. + + + +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    4.12 psb_spmm — Sparse Matrix by Dense Matrix Product

    +

    This subroutine computes the Sparse Matrix by Dense Matrix Product: +
    +
    +y ← αAx + βy
+
    +
    (1)
    +

    +
    +
    +       T
+y ← αA  x+ βy
+
    +
    (2)
    +

    +
    +
    +y ← αAHx + βy
+
    +
    (3)
    +

    +

    where: + + + +

    +

    +x

    +

    is the global dense matrix x:,: +

    +

    +y

    +

    is the global dense matrix y:,: +

    +

    +A

    +

    is the global sparse matrix A

    +
    + + + +


    + + + +
    +

    +

    + + + + +


    A, x, y, α, β Subroutine


    Short Precision Real psb_spmm
    Long Precision Real psb_spmm
    Short Precision Complexpsb_spmm
    Long Precision Complex psb_spmm


    +
    Table 12: Data types
    + + + +

    +
    +

    call psb_spmm(alpha, a, x, beta, y, desc_a, info)
    call psb_spmm(alpha, a, x, beta, y,desc_a, info, trans, work) +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +alpha

    +

    the scalar α.
    Scope: global
    Type: required
    Intent: in.
    Specified as: a number of the data type indicated in Table 12. +

    +

    +a

    +

    the local portion of the sparse matrix A.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_Tspmat_type. +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 12. The + rank of x must be the same of y. +

    +

    +beta

    + + + +

    the scalar β.
    Scope: global
    Type: required
    Intent: in.
    Specified as: a number of the data type indicated in Table 12. +

    +

    +y

    +

    the local portion of global dense matrix y.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 12. The + rank of y must be the same of x. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +trans

    +

    indicates what kind of operation to perform. +

    +

    + trans = N

    +

    the operation is specified by equation 1 +

    +

    + trans = T

    +

    the operation is specified by equation 2 +

    +

    + trans = C

    +

    the operation is specified by equation 3

    +

    Scope: global
    Type: optional
    Intent: in.
    Default: trans = N
    Specified as: a character variable. + + + +

    +

    +work

    +

    work array.
    Scope: local
    Type: optional
    Intent: inout.
    Specified as: a rank one array of the same type of x and y with the TARGET + attribute. +

    +

    +On Return

    +

    +

    +

    +y

    +

    the local portion of result matrix y.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: an array of rank one or two containing numbers of type specified + in Table 12. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    4.13 psb_spsm — Triangular System Solve

    +

    This subroutine computes the Triangular System Solve: +

    +
    +y  ←   αT- 1x + βy
+y  ←   αDT -1x +βy
+         - 1
+y  ←   αT  Dx  +βy
+y  ←   αT- Tx+ βy
+y  ←   αDT -Tx + βy
+y  ←   αT- TDx + βy
+         - H
+y  ←   αT   x +βy
+y  ←   αDT -Hx + βy
+y  ←   αT- HDx + βy
+
    +
    +

    where: +

    +

    +x

    +

    is the global dense matrix x:,: +

    +

    +y

    +

    is the global dense matrix y:,: +

    +

    +T

    +

    is the global sparse block triangular submatrix T +

    +

    +D

    +

    is the scaling diagonal matrix.

    + + + +

    call psb_spsm(alpha, t, x, beta, y, desc_a, info)
    call psb_spsm(alpha, t, x, beta, y, desc_a, info, trans, unit, choice, diag, work)
    +

    + + + +


    + + + +
    +

    +

    + + + + +


    T, x, y, D, α, β Subroutine


    Short Precision Real psb_spsm
    Long Precision Real psb_spsm
    Short Precision Complexpsb_spsm
    Long Precision Complex psb_spsm


    +
    Table 13: Data types
    + + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +alpha

    +

    the scalar α.
    Scope: global
    Type: required
    Intent: in.
    Specified as: a number of the data type indicated in Table 13. +

    +

    +t

    +

    the global portion of the sparse matrix T.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object type specified in § 3. +

    +

    +x

    +

    the local portion of global dense matrix x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 13. The + rank of x must be the same of y. +

    +

    +beta

    +

    the scalar β.
    Scope: global
    Type: required
    Intent: in.
    Specified as: a number of the data type indicated in Table 13. + + + +

    +

    +y

    +

    the local portion of global dense matrix y.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 13. The + rank of y must be the same of x. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +trans

    +

    specify with unitd the operation to perform. +

    +

    + trans = ’N’

    +

    the operation is with no transposed matrix +

    +

    + trans = ’T’

    +

    the operation is with transposed matrix. +

    +

    + trans = ’C’

    +

    the operation is with conjugate transposed matrix.

    +

    Scope: global
    Type: optional
    Intent: in.
    Default: trans = N
    Specified as: a character variable. +

    +

    +unitd

    +

    specify with trans the operation to perform. + + + +

    +

    + unitd = ’U’

    +

    the operation is with no scaling +

    +

    + unitd = ’L’

    +

    the operation is with left scaling +

    +

    + unitd = ’R’

    +

    the operation is with right scaling.

    +

    Scope: global
    Type: optional
    Intent: in.
    Default: unitd = U
    Specified as: a character variable. +

    +

    +choice

    +

    specifies the update of overlap elements to be performed on exit: +

    +

    +

    +

    psb_none_ +

    +

    +

    +

    psb_sum_ +

    +

    +

    +

    psb_avg_ +

    +

    +

    +

    psb_square_root_

    +

    Scope: global
    Type: optional
    Intent: in.
    Default: psb_avg_
    Specified as: an integer variable. + + + +

    +

    +diag

    +

    the diagonal scaling matrix.
    Scope: local
    Type: optional
    Intent: in.
    Default: diag(1) = 1(noscaling)
    Specified as: a rank one array containing numbers of the type indicated in + Table 13. +

    +

    +work

    +

    a work array.
    Scope: local
    Type: optional
    Intent: inout.
    Specified as: a rank one array of the same type of x with the TARGET + attribute. +

    +

    +On Return

    +

    +

    +

    +y

    +

    the local portion of global dense matrix y.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: an array of rank one or two containing numbers of type specified + in Table 13. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    4.14 psb_gemlt — Entrywise Product

    +

    This function computes the entrywise product between two vectors x and +y +

    +dot ← x(i)y(i).
+
    +

    +

    psb_gemlt(x, y, desc_a, info) +

    + + + +


    + + + +
    +

    +

    + + + + +


    dot, x, y Function


    Short Precision Real psb_gemlt
    Long Precision Real psb_gemlt
    Short Precision Complexpsb_gemlt
    Long Precision Complex psb_gemlt


    +
    Table 14: Data types
    + + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense vector x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

    +

    +y

    +

    the local portion of global dense vector y.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +On Return

    +

    + + + +

    +

    +y

    +

    the local portion of result submatrix y.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: an object of type psb_T_vect_type containing numbers of + the type indicated in Table 14. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    4.15 psb_gediv — Entrywise Division

    +

    This function computes the entrywise division between two vectors x and +y +

    +∕ ← x(i)∕y(i).
+
    +

    +

    psb_gediv(x, y, desc_a, info, [flag) +

    + + + +


    + + + +
    +

    +

    + + + + +


    , x, y Function


    Short Precision Real psb_gediv
    Long Precision Real psb_gediv
    Short Precision Complexpsb_gediv
    Long Precision Complex psb_gediv


    +
    Table 15: Data types
    + + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense vector x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

    +

    +y

    +

    the local portion of global dense vector y.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +flag

    +

    check if any of the y(i) = 0, and in case returns error halting the + computation.
    Scope: local
    Type: optional Intent: in.
    Specified as: the logical value flag=.true. + + + +

    +

    +On Return

    +

    +

    +

    +x

    +

    the local portion of result submatrix x.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: an object of type psb_T_vect_type containing numbers of + the type indicated in Table 14. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    4.16 psb_geinv — Entrywise Inversion

    +

    This function computes the entrywise inverse of a vector x and puts it into +y +

    +∕ ← 1∕x(i).
+
    +

    +

    psb_geinv(x, y, desc_a, info, [flag) +

    + + + +


    + + + +
    +

    +

    + + + + +


    , x, y Function


    Short Precision Real psb_geinv
    Long Precision Real psb_geinv
    Short Precision Complexpsb_geinv
    Long Precision Complex psb_geinv


    +
    Table 16: Data types
    + + + +

    +
    +
    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    the local portion of global dense vector x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_T_vect_type containing numbers of + type specified in Table 2. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an object of type psb_desc_type. +

    +

    +flag

    +

    check if any of the x(i) = 0, and in case returns error halting the + computation.
    Scope: local
    Type: optional Intent: in.
    Specified as: the logical value flag=.true. +

    +

    +On Return

    +

    + + + +

    +

    +y

    +

    the local portion of result submatrix x.
    Scope: local
    Type: required
    Intent: out.
    Specified as: an object of type psb_T_vect_type containing numbers of + the type indicated in Table 16. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + + + + + + + + +href="userhtml.html#userhtmlse7.html" >up]

    + id="tailuserhtmlse4.html"> diff --git a/docs/html/userhtmlse5.html b/docs/html/userhtmlse5.html index c897c0eb..6b87e4b3 100644 --- a/docs/html/userhtmlse5.html +++ b/docs/html/userhtmlse5.html @@ -11,42 +11,2309 @@

    +href="#tailuserhtmlse5.html">tail] [up]

    5 Communication routines

    + id="x10-720005">Communication routines

    The routines in this chapter implement various global communication operators on vectors associated with a discretization mesh. For auxiliary communication routines not tied to a discretization space see 6. +href="userhtmlse6.html#x11-770006">6. -

    -  5.1 psb_halo — Halo Data Communication -
     5.2 psb_ovrl — Overlap Update -
     5.3 psb_gather — Gather Global Dense Matrix -
     5.4 psb_scatter — Scatter Global Dense Matrix +

    5.1 psb_halo — Halo Data Communication

    +

    These subroutines gathers the values of the halo elements: +

    +x ← x
+
    +

    where: +

    +

    +x

    +

    is a global dense submatrix.

    +
    + + + +


    + + + +
    +

    +

    + + + + + +


    α, x Subroutine


    Integer psb_halo
    Short Precision Real psb_halo
    Long Precision Real psb_halo
    Short Precision Complexpsb_halo
    Long Precision Complex psb_halo


    +
    Table 17: Data types
    + + + +

    +

    call psb_halo(x, desc_a, info)
    call psb_halo(x, desc_a, info, work, data) +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    global dense matrix x.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 17. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +work

    +

    the work array.
    Scope: local
    Type: optional
    Intent: inout.
    Specified as: a rank one array of the same type of x. +

    +

    +data

    + + +

    index list selector.
    Scope: global
    Type: optional
    Specified + as: an integer. Values:psb_comm_halo_,psb_comm_mov_, psb_comm_ext_, + default: psb_comm_halo_. Chooses the index list on which to base the data + exchange. +

    +

    +On Return

    +

    +

    +

    +x

    +

    global dense result matrix x.
    Scope: local
    Type: required
    Intent: inout.
    Returned as: a rank one or two array containing numbers of type specified + in Table 17. +

    +

    +info

    +

    the local portion of result submatrix y.
    Scope: local
    Type: required
    Intent: out.
    An integer value that contains an error code.

    +


    +
    +

    +

    PIC

    +
    Figure 3: Sample discretization mesh.
    +


    +

    Usage Example Consider the discretization mesh depicted in fig. 3, partitioned +among two processes as shown by the dashed line; the data distribution is such that +each process will own 32 entries in the index space, with a halo made of 8 entries +placed at local indices 33 through 40. If process 0 assigns an initial value of 1 to +its entries in the x vector, and process 1 assigns a value of 2, then after +a call to psb_halo the contents of the local vectors will be the following: +

    + + +


    + + + +
    +

    +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Process 0
    Process 1
    +
    IGLOB(I)X(I) IGLOB(I)X(I)
    1 1 1.0 1 33 2.0
    2 2 1.0 2 34 2.0
    3 3 1.0 3 35 2.0
    4 4 1.0 4 36 2.0
    5 5 1.0 5 37 2.0
    6 6 1.0 6 38 2.0
    7 7 1.0 7 39 2.0
    8 8 1.0 8 40 2.0
    9 9 1.0 9 41 2.0
    10 10 1.010 42 2.0
    11 11 1.011 43 2.0
    12 12 1.012 44 2.0
    13 13 1.013 45 2.0
    14 14 1.014 46 2.0
    15 15 1.015 47 2.0
    16 16 1.016 48 2.0
    17 17 1.017 49 2.0
    18 18 1.018 50 2.0
    19 19 1.019 51 2.0
    20 20 1.020 52 2.0
    21 21 1.021 53 2.0
    22 22 1.022 54 2.0
    23 23 1.023 55 2.0
    24 24 1.024 56 2.0
    25 25 1.025 57 2.0
    26 26 1.026 58 2.0
    27 27 1.027 59 2.0
    28 28 1.028 60 2.0
    29 29 1.029 61 2.0
    30 30 1.030 62 2.0
    31 31 1.031 63 2.0
    32 32 1.032 64 2.0
    33 33 2.033 25 1.0
    34 34 2.034 26 1.0
    35 35 2.035 27 1.0
    36 36 2.036 28 1.0
    37 37 2.037 29 1.0
    38 38 2.038 30 1.0
    39 39 2.039 31 1.0
    40 40 2.040 32 1.0
    + + + +

    +
    + + + +

    5.2 psb_ovrl — Overlap Update

    +

    These subroutines applies an overlap operator to the input vector: +

    +x ← Qx
+
    +

    where: +

    +

    +x

    +

    is the global dense submatrix x +

    +

    +Q

    +

    is the overlap operator; it is the composition of two operators Pa and PT.

    +
    + +


    + + + +
    +

    +

    + + + +


    x Subroutine


    Short Precision Real psb_ovrl
    Long Precision Real psb_ovrl
    Short Precision Complexpsb_ovrl
    Long Precision Complex psb_ovrl


    +
    Table 18: Data types
    + + + +

    +
    +

    call psb_ovrl(x, desc_a, info)
    call psb_ovrl(x, desc_a, info, update=update_type, work=work) +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    global dense matrix x.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type containing numbers of type specified in Table 18. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +update

    +

    Update operator.
    +

    +

    + update = psb_none_

    +

    Do nothing; +

    +

    + update = psb_add_

    +

    Sum overlap entries, i.e. apply PT; + + + +

    +

    + update = psb_avg_

    +

    Average overlap entries, i.e. apply PaPT;

    +

    Scope: global
    Intent: in.
    Default: update_type = psb_avg_
    Scope: global
    Specified as: a integer variable. +

    +

    +work

    +

    the work array.
    Scope: local
    Type: optional
    Intent: inout.
    Specified as: a one dimensional array of the same type of x. +

    +

    +On Return

    +

    +

    +

    +x

    +

    global dense result matrix x.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: an array of rank one or two containing numbers of type specified + in Table 18. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. + + + +

      If there is no overlap in the data distribution associated with the + descriptor, no operations are performed; +

    2. +
    3. +

      The operator PT performs the reduction sum of overlap elements; it is a + “prolongation” operator PT that replicates overlap elements, accounting + for the physical replication of data; +

    4. +
    5. +

      The operator Pa performs a scaling on the overlap elements by the + amount of replication; thus, when combined with the reduction operator, + it implements the average of replicated elements over all of their instances.

    +


    + + + + + + + +
    +

    +

    PIC

    +
    Figure 4: Sample discretization mesh.
    + + + +


    +

    Example of use Consider the discretization mesh depicted in fig. 4, partitioned +among two processes as shown by the dashed lines, with an overlap of 1 extra layer +with respect to the partition of fig. 3; the data distribution is such that +each process will own 40 entries in the index space, with an overlap of 16 +entries placed at local indices 25 through 40; the halo will run from local +index 41 through local index 48.. If process 0 assigns an initial value of 1 to +its entries in the x vector, and process 1 assigns a value of 2, then after a +call to psb_ovrl with psb_avg_ and a call to psb_halo_ the contents of +the local vectors will be the following (showing a transition among the two +subdomains) +

    + + + +


    + + + +
    +

    +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Process 0
    Process 1
    +
    IGLOB(I)X(I) IGLOB(I)X(I)
    1 1 1.0 1 33 1.5
    2 2 1.0 2 34 1.5
    3 3 1.0 3 35 1.5
    4 4 1.0 4 36 1.5
    5 5 1.0 5 37 1.5
    6 6 1.0 6 38 1.5
    7 7 1.0 7 39 1.5
    8 8 1.0 8 40 1.5
    9 9 1.0 9 41 2.0
    10 10 1.010 42 2.0
    11 11 1.011 43 2.0
    12 12 1.012 44 2.0
    13 13 1.013 45 2.0
    14 14 1.014 46 2.0
    15 15 1.015 47 2.0
    16 16 1.016 48 2.0
    17 17 1.017 49 2.0
    18 18 1.018 50 2.0
    19 19 1.019 51 2.0
    20 20 1.020 52 2.0
    21 21 1.021 53 2.0
    22 22 1.022 54 2.0
    23 23 1.023 55 2.0
    24 24 1.024 56 2.0
    25 25 1.525 57 2.0
    26 26 1.526 58 2.0
    27 27 1.527 59 2.0
    28 28 1.528 60 2.0
    29 29 1.529 61 2.0
    30 30 1.530 62 2.0
    31 31 1.531 63 2.0
    32 32 1.532 64 2.0
    33 33 1.533 25 1.5
    34 34 1.534 26 1.5
    35 35 1.535 27 1.5
    36 36 1.536 28 1.5
    37 37 1.537 29 1.5
    38 38 1.538 30 1.5
    39 39 1.539 31 1.5
    40 40 1.540 32 1.5
    41 41 2.041 17 1.0
    42 42 2.042 18 1.0
    43 43 2.043 19 1.0
    44 44 2.044 20 1.0
    45 45 2.045 21 1.0
    46 46 2.046 22 1.0
    47 47 2.047 23 1.0
    48 48 2.048 24 1.0
    + + + +

    +
    + + + +

    5.3 psb_gather — Gather Global Dense Matrix

    +

    These subroutines collect the portions of global dense matrix distributed over all +process into one single array stored on one process. +

    +glob-x ← collect(loc-xi)
+
    +

    where: +

    +

    +glob_x

    +

    is the global submatrix glob_x1:m,1:n +

    +

    +loc_xi

    +

    is the local portion of global dense matrix on process i. +

    +

    +collect

    +

    is the collect function.

    +
    + + + +


    + + + +
    +

    +

    + + + + + +


    xi,y Subroutine


    Integer psb_gather
    Short Precision Real psb_gather
    Long Precision Real psb_gather
    Short Precision Complexpsb_gather
    Long Precision Complex psb_gather


    +
    Table 19: Data types
    + + + +

    +
    +

    call psb_gather(glob_x, loc_x, desc_a, info, root) +call psb_gather(glob_x, loc_x, desc_a, info, root) +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +loc_x

    +

    the local portion of global dense matrix glob_x.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type indicated in Table 19. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +root

    +

    The process that holds the global copy. If root = -1 all the processes will + have a copy of the global vector.
    Scope: global
    Type: optional
    Intent: in.
    Specified as: an integer variable -1 root np - 1, default -1. +

    +

    +On Return

    +

    + + + +

    +

    +glob_x

    +

    The array where the local parts must be gathered.
    Scope: global
    Type: required
    Intent: out.
    Specified as: a rank one or two array with the ALLOCATABLE attribute. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    5.4 psb_scatter — Scatter Global Dense Matrix

    +

    These subroutines scatters the portions of global dense matrix owned by a process to +all the processes in the processes grid. +

    +loc-xi ← scatter(glob-x)
+
    +

    where: +

    +

    +glob_x

    +

    is the global matrix glob_x1:m,1:n +

    +

    +loc_xi

    +

    is the local portion of global dense matrix on process i. +

    +

    +scatter

    +

    is the scatter function.

    +
    + + + +


    + + + +
    +

    +

    + + + + + +


    xi,y Subroutine


    Integer psb_scatter
    Short Precision Real psb_scatter
    Long Precision Real psb_scatter
    Short Precision Complexpsb_scatter
    Long Precision Complex psb_scatter


    +
    Table 20: Data types
    + + + +

    +
    +

    call psb_scatter(glob_x, loc_x, desc_a, info, root, mold) +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +glob_x

    +

    The array that must be scattered into local pieces.
    Scope: global
    Type: required
    Intent: in.
    Specified as: a rank one or two array. +

    +

    +desc_a

    +

    contains data structures for communications.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +root

    +

    The process that holds the global copy. If root = -1 all the processes have + a copy of the global vector.
    Scope: global
    Type: optional
    Intent: in.
    Specified as: an integer variable -1 root np - 1, default psb_root_, + i.e. process 0. +

    +

    +mold

    + + + +

    The desired dynamic type for the internal vector storage.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: an object of a class derived from psb_T_base_vect_type; + this is only allowed when loc_x is of type psb_T_vect_type. +

    +

    +On Return

    +

    +

    +

    +loc_x

    +

    the local portion of global dense matrix glob_x.
    Scope: local
    Type: required
    Intent: out.
    Specified as: a rank one or two ALLOCATABLE array or an object of type + psb_T_vect_type containing numbers of the type indicated in Table 20. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + + + + + + + + +href="userhtml.html#userhtmlse8.html" >up]

    + id="tailuserhtmlse5.html"> diff --git a/docs/html/userhtmlse6.html b/docs/html/userhtmlse6.html index 78d2fa1e..19fb0f5d 100644 --- a/docs/html/userhtmlse6.html +++ b/docs/html/userhtmlse6.html @@ -11,105 +11,4060 @@

    +href="userhtmlse3.html#tailuserhtmlse6.html">tail] [up]

    6 Data management routines

    + id="x11-770006">Data management routines

    -

    +

    6.1 psb_cdall — Allocates a communication descriptor

    + + + +
    +call psb_cdall(icontxt, desc_a, info,mg=mg,parts=parts)
    +call psb_cdall(icontxt, desc_a, info,vg=vg,[mg=mg,flag=flag])
    +call psb_cdall(icontxt, desc_a, info,vl=vl,[nl=nl,globalcheck=.false.,lidx=lidx])
    +call psb_cdall(icontxt, desc_a, info,nl=nl)
    +call psb_cdall(icontxt, desc_a, info,mg=mg,repl=.true.)
    +
    +

    +

    This subroutine initializes the communication descriptor associated with an index +space. One of the optional arguments parts, vg, vl, nl or repl must be specified, +thereby choosing the specific initialization strategy. +

    +

    +On Entry

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +icontxt

    +

    the communication context.
    Scope:global.
    Type:required.
    Intent: in.
    Specified as: an integer value. +

    +

    +vg

    +

    Data allocation: each index i ∈{1mg} is allocated to process vg(i).
    Scope:global.
    Type:optional.
    Intent: in.
    Specified as: an integer array. +

    +

    +flag

    +

    Specifies whether entries in vg are zero- or one-based.
    Scope:global.
    Type:optional.
    Intent: in.
    Specified as: an integer value 0,1, default 0. + + + +

    +

    +mg

    +

    the (global) number of rows of the problem.
    Scope:global.
    Type:optional.
    Intent: in.
    Specified as: an integer value. It is required if parts or repl is specified, + it is optional if vg is specified. +

    +

    +parts

    +

    the subroutine that defines the partitioning scheme.
    Scope:global.
    Type:required.
    Specified as: a subroutine. +

    +

    +vl

    +

    Data allocation: the set of global indices vl(1 : nl) belonging to the calling + process.
    Scope:local.
    Type:optional.
    Intent: in.
    Specified as: an integer array. +

    +

    +nl

    +

    Data allocation: in a generalized block-row distribution the number of + indices belonging to the current process.
    Scope:local.
    Type:optional.
    Intent: in.
    Specified as: an integer value. May be specified together with vl. +

    +

    +repl

    +

    Data allocation: build a replicated index space (i.e. all processes own all + indices).
    Scope:global.
    Type:optional.
    Intent: in.
    Specified as: the logical value .true. + + + +

    +

    +globalcheck

    +

    Data allocation: do global checks on the local index lists vl
    Scope:global.
    Type:optional.
    Intent: in.
    Specified as: a logical value, default: .false. +

    +

    +lidx

    +

    Data allocation: the set of local indices lidx(1 : nl) to be assigned to the + global indices vl.
    Scope:local.
    Type:optional.
    Intent: in.
    Specified as: an integer array.

    +

    +

    +

    +On Return

    +

    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: out.
    Specified as: a structured data of type psb_desc_type. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. + + + +

      One of the optional arguments parts, vg, vl, nl or repl must be specified, + thereby choosing the initialization strategy as follows: +

      +

      + parts

      +

      In this case we have a subroutine specifying the mapping between global + indices and process/local index pairs. If this optional argument is + specified, then it is mandatory to specify the argument mg as well. The + subroutine must conform to the following interface: + + + +

      +           interface
      +              subroutine psb_parts(glob_index,mg,np,pv,nv)
      +                integer, intent (in)  :: glob_index,np,mg
      +                integer, intent (out) :: nv, pv(*)
      +              end subroutine psb_parts
      +           end interface
      +
      +

      The input arguments are: +

      +

      + glob_index

      +

      The global index to be mapped; +

      +

      + np

      +

      The number of processes in the mapping; +

      +

      + mg

      +

      The total number of global rows in the mapping;

      +

      The output arguments are: +

      +

      + nv

      +

      The number of entries in pv; +

      +

      + pv

      +

      A vector containing the indices of the processes to which the + global index should be assigend; each entry must satisfy 0 + pv(i) < np; if nv > 1 we have an index assigned to multiple + processes, i.e. we have an overlap among the subdomains.

      +
      +

      + vg

      +

      In this case the association between an index and a process is specified via + an integer vector vg(1:mg); each index i ∈{1mg} is assigned to process + vg(i). The vector vg must be identical on all calling processes; its + entries may have the ranges (0np - 1) or (1np) according to the + value of flag. The size mg may be specified via the optional + argument mg; the default is to use the entire vector vg, thus having + mg=size(vg). +

      +

      + vl

      + + + +

      In this case we are specifying the list of indices vl(1:nl) assigned to the + current process; thus, the global problem size mg is given by the range of + the aggregate of the individual vectors vl specified in the calling + processes. The size may be specified via the optional argument nl; the + default is to use the entire vector vl, thus having nl=size(vl). If + globalcheck=.true. the subroutine will check how many times each + entry in the global index space (1mg) is specified in the input lists vl, + thus allowing for the presence of overlap in the input, and checking for + “orphan” indices. If globalcheck=.false., the subroutine will not + check for overlap, and may be significantly faster, but the user is + implicitly guaranteeing that there are neither orphan nor overlap + indices. +

      +

      + lidx

      +

      The optional argument lidx is available for those cases in which the user + has already established a global-to-local mapping; if it is specified, each + index in vl(i) will be mapped to the corresponding local index lidx(i). + When specifying the argument lidx the user would also likely employ + lidx in calls to psb_cdins and local in calls to psb_spins and + psb_geins; see also sec. 2.3.1. +

      +

      + nl

      +

      If this argument is specified alone (i.e. without vl) the result is a + generalized row-block distribution in which each process I gets assigned a + consecutive chunk of NI = nl global indices. +

      +

      + repl

      +

      This arguments specifies to replicate all indices on all processes. This is a + special purpose data allocation that is useful in the construction of some + multilevel preconditioners.

      +
    2. +
    3. +

      On exit from this routine the descriptor is in the build state. +

    4. +
    5. +

      Calling the routine with vg or parts implies that every process will scan the + entire index space to figure out the local indices. +

    6. +
    7. +

      Overlapped indices are possible with both parts and vl invocations. + + + +

    8. +
    9. +

      When the subroutine is invoked with vl in conjunction with globalcheck=.true., + it will perform a scan of the index space to search for overlap or orphan + indices. +

    10. +
    11. +

      When the subroutine is invoked with vl in conjunction with globalcheck=.false., + no index space scan will take place. Thus it is the responsibility of the user to + make sure that the indices specified in vl have neither orphans nor overlaps; if + this assumption fails, results will be unpredictable. +

    12. +
    13. +

      Orphan and overlap indices are impossible by construction when the subroutine + is invoked with nl (alone), or vg.

    + + + +

    6.2 psb_cdins — Communication descriptor insert routine

    + + + +
    +call psb_cdins(nz, ia, ja, desc_a, info [,ila,jla])
    +call psb_cdins(nz,ja,desc,info[,jla,mask,lidx])
    +
    +

    +

    This subroutine examines the edges of the graph associated with the +discretization mesh (and isomorphic to the sparsity pattern of a linear system +coefficient matrix), storing them as necessary into the communication descriptor. In +the first form the edges are specified as pairs of indices ia(i),ja(i); the starting index +ia(i) should belong to the current process. In the second form only the remote indices +ja(i) are specified. +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +nz

    +

    the number of points being inserted.
    Scope: local.
    Type: required.
    Intent: in.
    Specified as: an integer value. +

    +

    +ia

    +

    the indices of the starting vertex of the edges being inserted.
    Scope: local.
    Type: required.
    Intent: in.
    Specified as: an integer array of length nz. +

    +

    +ja

    + + + +

    the indices of the end vertex of the edges being inserted.
    Scope: local.
    Type: required.
    Intent: in.
    Specified as: an integer array of length nz. +

    +

    +mask

    +

    Mask entries in ja, they are inserted only when the corresponding mask + entries are .true.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: a logical array of length nz, default .true.. +

    +

    +lidx

    +

    User defined local indices for ja.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: an integer array of length nz.

    +

    +

    +

    +On Return

    +

    +

    +

    +desc_a

    +

    the updated communication descriptor.
    Scope:local.
    Type:required.
    Intent: inout.
    Specified as: a structured data of type psb_desc_type. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected. + + + +

    +

    +ila

    +

    the local indices of the starting vertex of the edges being inserted.
    Scope: local.
    Type: optional.
    Intent: out.
    Specified as: an integer array of length nz. +

    +

    +jla

    +

    the local indices of the end vertex of the edges being inserted.
    Scope: local.
    Type: optional.
    Intent: out.
    Specified as: an integer array of length nz. +

    +

    Notes +

      +
    1. +

      This routine may only be called if the descriptor is in the build state; +

    2. +
    3. +

      This routine automatically ignores edges that do not insist on the current + process, i.e. edges for which neither the starting nor the end vertex belong + to the current process. +

    4. +
    5. +

      The second form of this routine will be useful when dealing with + user-specified index mappings; see also 2.3.1.

    + + + +

    6.3 psb_cdasb — Communication descriptor assembly routine

    + + + +
    +call psb_cdasb(desc_a, info [, mold])
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: inout.
    Specified as: a structured data of type psb_desc_type. +

    +

    +mold

    +

    The desired dynamic type for the internal index storage.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: a object of type derived from (integer) + psb_T_base_vect_type.

    +

    +

    +

    +On Return

    +

    +

    +

    +desc_a

    + + + +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: inout.
    Specified as: a structured data of type psb_desc_type. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      On exit from this routine the descriptor is in the assembled state.

    +

    This call will set up all the necessary information for the halo data exchanges. In doing +so, the library will need to identify the set of processes owning the halo indices +through the use of the desc%fnd_owner() method; the owning processes +are the topological neighbours of the calling process. If the user has some +background information on the processes that are neighbours of the current one, +it is possible to specify explicitly the list of adjacent processes with a call +to desc%set_p_adjcncy(list); this will speed up the subsequent call to +psb_cdasb. + + + +

    6.4 psb_cdcpy — Copies a communication descriptor

    + + + +
    +call psb_cdcpy(desc_in, desc_out, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +desc_in

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +

    +

    +On Return

    +

    +

    +

    +desc_out

    +

    the communication descriptor copy.
    Scope:local.
    Type:required.
    Intent: out.
    Specified as: a structured data of type psb_desc_type. +

    +

    +info

    + + + +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    6.5 psb_cdfree — Frees a communication descriptor

    + + + +
    +call psb_cdfree(desc_a, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +desc_a

    +

    the communication descriptor to be freed.
    Scope:local.
    Type:required.
    Intent: inout.
    Specified as: a structured data of type psb_desc_type.

    +

    +

    +

    +On Return

    +

    +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    6.6 psb_cdbldext — Build an extended communication descriptor

    + + + +
    +call psb_cdbldext(a,desc_a,nl,desc_out, info, extype)
    +
    +

    +

    This subroutine builds an extended communication descriptor, based on the input +descriptor desc_a and on the stencil specified through the input sparse matrix +a. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +a

    +

    A sparse matrix Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data type. +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +nl

    +

    the number of additional layers desired.
    Scope:global.
    Type:required.
    Intent: in.
    Specified as: an integer value nl 0. + + + +

    +

    +extype

    +

    the kind of estension required.
    Scope:global.
    Type:optional .
    Intent: in.
    Specified as: an integer value psb_ovt_xhal_, psb_ovt_asov_, default: + psb_ovt_xhal_ +

    +

    +

    +

    +On Return

    +

    +

    +

    +desc_out

    +

    the extended communication descriptor.
    Scope:local.
    Type:required.
    Intent: inout.
    Specified as: a structured data of type psb_desc_type. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      Specifying psb_ovt_xhal_ for the extype argument the user will obtain a + descriptor for a domain partition in which the additional layers are fetched + as part of an (extended) halo; however the index-to-process mapping is + identical to that of the base descriptor; +

    2. +
    3. + + + +

      Specifying psb_ovt_asov_ for the extype argument the user will obtain + a descriptor with an overlapped decomposition: the additional layer is + aggregated to the local subdomain (and thus is an overlap), and a new + halo extending beyond the last additional layer is formed.

    + + + +

    6.7 psb_spall — Allocates a sparse matrix

    + + + +
    +call psb_spall(a, desc_a, info [, nnz, dupl, bldmode])
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +nnz

    +

    An estimate of the number of nonzeroes in the local part of the assembled + matrix.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value. +

    +

    +dupl

    +

    How to handle duplicate coefficients.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: integer, possible values: psb_dupl_ovwrt_, psb_dupl_add_, + psb_dupl_err_. + + + +

    +

    +bldmode

    +

    Whether to keep track of matrix entries that do not belong to the current + process.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: + an integer value psb_matbld_noremote_, psb_matbld_remote_. Default: + psb_matbld_noremote_.

    +

    +

    +

    +On Return

    +

    +

    +

    +a

    +

    the matrix to be allocated.
    Scope:local
    Type:required
    Intent: out.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      On exit from this routine the sparse matrix is in the build state. +

    2. +
    3. +

      The descriptor may be in either the build or assembled state. + + + +

    4. +
    5. +

      Providing a good estimate for the number of nonzeroes nnz in the + assembled matrix may substantially improve performance in the matrix + build phase, as it will reduce or eliminate the need for (potentially + multiple) data reallocations; +

    6. +
    7. +

      Using psb_matbld_remote_ is likely to cause a runtime overhead at + assembly time;

    + + + +

    6.8 psb_spins — Insert a set of coefficients into a sparse matrix

    + + + +
    +call psb_spins(nz, ia, ja, val, a, desc_a, info [,local])
    +call psb_spins(nr, irw, irp, ja, val, a, desc_a, info [,local])
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +nz

    +

    the number of coefficients to be inserted.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: an integer scalar. +

    +

    +nr

    +

    the number of rows to be inserted.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: an integer scalar. +

    +

    +irw

    +

    the first row to be inserted.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: an integer scalar. + + + +

    +

    +ia

    +

    the row indices of the coefficients to be inserted.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: an integer array of size nz. +

    +

    +irp

    +

    the row pointers of the coefficients to be inserted.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: an integer array of size nr + 1. +

    +

    +ja

    +

    the column indices of the coefficients to be inserted.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: an integer array of size nz. +

    +

    +val

    +

    the coefficients to be inserted.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: an array of size nz. Must be of the same type and kind of + the coefficients of the sparse matrix a. +

    +

    +desc_a

    +

    The communication descriptor.
    Scope: local.
    Type: required.
    Intent: inout.
    Specified as: a variable of type psb_desc_type.
    + + + +

    +

    +local

    +

    Whether the entries in the indices vectors ia, ja are already in local + numbering.
    Scope:local.
    Type:optional.
    Specified as: a logical value; default: .false.. +

    +

    +

    +

    +On Return

    +

    +

    +

    +a

    +

    the matrix into which coefficients will be inserted.
    Scope:local
    Type:required
    Intent: inout.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +desc_a

    +

    The communication descriptor.
    Scope: local.
    Type: required.
    Intent: inout.
    Specified as: a variable of type psb_desc_type.
    +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes + + + +

      +
    1. +

      On entry to this routine the descriptor may be in either the build or + assembled state. +

    2. +
    3. +

      On entry to this routine the sparse matrix may be in either the build or + update state. +

    4. +
    5. +

      If the descriptor is in the build state, then the sparse matrix must also be + in the build state; the action of the routine is to (implicitly) call psb_cdins + to add entries to the sparsity pattern; each sparse matrix entry implicitly + defines a graph edge, that is passed to the descriptor routine for the + appropriate processing; +

    6. +
    7. +

      The input data can be passed in either COO or CSR formats; +

    8. +
    9. +

      In COO format the coefficients to be inserted are represented by the + ordered triples ia(i),ja(i),val(i), for i = 1,,nz; these triples are + arbitrary; +

    10. +
    11. +

      In CSR format the coefficients to be inserted for each input row i = 1,nr + are represented by the ordered triples (i + irw - 1),ja(j),val(j), for + j = irp(i),,irp(i + 1) - 1; these triples should belong to the current + process, i.e. i+irw-1 should be one of the local indices, but are otherwise + arbitrary; +

    12. +
    13. +

      There is no requirement that a given row must be passed in its entirety + to a single call to this routine: the buildup of a row may be split into as + many calls as desired (even in the CSR format); + + + +

    14. +
    15. +

      Coefficients from different rows may also be mixed up freely in a single + call, according to the application needs; +

    16. +
    17. +

      Coefficients from matrix rows not owned by the calling process are treated + according to the value of bldmode specified at allocation time; if bldmode + was chosen as psb_matbld_remote_ the library will keep track of them, + otherwise they are silently ignored; +

    18. +
    19. +

      If the descriptor is in the assembled state, then any entries in the sparse + matrix that would generate additional communication requirements are + ignored; +

    20. +
    21. +

      If the matrix is in the update state, any entries in positions that were not + present in the original matrix are ignored.

    + + + +

    6.9 psb_spasb — Sparse matrix assembly routine

    + + + +
    +call psb_spasb(a, desc_a, info [, afmt, upd,  mold])
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in/out.
    Specified as: a structured data of type psb_desc_type. +

    +

    +afmt

    +

    the storage format for the sparse matrix.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: an array of characters. Defalt: ’CSR’. +

    +

    +upd

    +

    Provide for updates to the matrix coefficients.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: integer, possible values: psb_upd_srch_, psb_upd_perm_ +

    +

    +mold

    + + + +

    The desired dynamic type for the internal matrix storage.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: an object of a class derived from psb_T_base_sparse_mat.

    +

    +

    +

    +On Return

    +

    +

    +

    +a

    +

    the matrix to be assembled.
    Scope:local
    Type:required
    Intent: inout.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in/out.
    Specified as: a structured data of type psb_desc_type. If the matrix was + allocated with bldmode=psb_matbld_remote_, then the descriptor will be + reassembled. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      On entry to this routine the descriptor must be in the assembled state, + i.e. psb_cdasb must already have been called. + + + +

    2. +
    3. +

      The sparse matrix may be in either the build or update state; +

    4. +
    5. +

      Duplicate entries are detected and handled in both build and update state, + with the exception of the error action that is only taken in the build state, + i.e. on the first assembly; +

    6. +
    7. +

      If the update choice is psb_upd_perm_, then subsequent calls to psb_spins + to update the matrix must be arranged in such a way as to produce exactly + the same sequence of coefficient values as encountered at the first assembly; +

    8. +
    9. +

      The output storage format need not be the same on all processes; +

    10. +
    11. +

      On exit from this routine the matrix is in the assembled state, and thus + is suitable for the computational routines; +

    12. +
    13. +

      If the bldmode=psb_matbld_remote_ value was specified at allocation + time, contributions defined on the current process but belonging to a + remote process will be handled accordingly. This is most likely to occur in + finite element applications, with dupl=psb_dupl_add_; it is necessary to + check for possible updates needed in the descriptor, hence there will be a + runtime overhead.

    + + + +

    6.10 psb_spfree — Frees a sparse matrix

    + + + +
    +call psb_spfree(a, desc_a, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +a

    +

    the matrix to be freed.
    Scope:local
    Type:required
    Intent: inout.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type.

    +

    +

    +

    +On Return

    +

    +

    +

    +info

    + + + +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    6.11 psb_sprn — Reinit sparse matrix structure for psblas routines.

    + + + +
    +call psb_sprn(a, decsc_a, info, clear)
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +a

    +

    the matrix to be reinitialized.
    Scope:local
    Type:required
    Intent: inout.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +clear

    +

    Choose whether to zero out matrix coefficients
    Scope:local.
    Type:optional.
    Intent: in.
    Default: true.

    +

    + + + +

    +

    +On Return

    +

    +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      On exit from this routine the sparse matrix is in the update state.

    + + + +

    6.12 psb_geall — Allocates a dense matrix

    + + + +
    +call psb_geall(x, desc_a, info[, dupl, bldmode, n, lb])
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +desc_a

    +

    The communication descriptor.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a variable of type psb_desc_type.
    +

    +

    +n

    +

    The number of columns of the dense matrix to be allocated.
    Scope: local
    Type: optional
    Intent: in.
    Specified as: Integer scalar, default 1. It is not a valid argument if x is a + rank-1 array. +

    +

    +lb

    +

    The lower bound for the column index range of the dense matrix to be + allocated.
    Scope: local
    Type: optional
    Intent: in.
    Specified as: Integer scalar, default 1. It is not a valid argument if x is a + rank-1 array. + + + +

    +

    +dupl

    +

    How to handle duplicate coefficients.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: integer, possible values: psb_dupl_ovwrt_, psb_dupl_add_; + psb_dupl_err_ has no effect. +

    +

    +bldmode

    +

    Whether to keep track of matrix entries that do not belong to the current + process.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: + an integer value psb_matbld_noremote_, psb_matbld_remote_. Default: + psb_matbld_noremote_.

    +

    +

    +

    +On Return

    +

    +

    +

    +x

    +

    The dense matrix to be allocated.
    Scope: local
    Type: required
    Intent: out.
    Specified as: a rank one or two array with the ALLOCATABLE attribute + or an object of type psb_T_vect_type, of type real, complex or integer.
    +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + +

    Notes +

      +
    1. +

      Using psb_matbld_remote_ is likely to cause a runtime overhead at + assembly time;

    + + +

    6.13 psb_geins — Dense matrix insertion routine

    + + +
    +call psb_geins(m, irw, val, x, desc_a, info [,local])
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +m

    +

    Number of rows in val to be inserted.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: an integer value. +

    +

    +irw

    +

    Indices of the rows to be inserted. Specifically, row i of val will be + inserted into the local row corresponding to the global row index irw(i). + Scope:local.
    Type:required.
    Intent: in.
    Specified as: an integer array. +

    +

    +val

    +

    the dense submatrix to be inserted.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a rank 1 or 2 array. Specified as: an integer value. + + +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +local

    +

    Whether the entries in the index vector irw, are already in local + numbering.
    Scope:local.
    Type:optional.
    Specified as: a logical value; default: .false.. +

    +

    +

    +

    +On Return

    +

    +

    +

    +x

    +

    the output dense matrix.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one or two array or an object of type + psb_T_vect_type, of type real, complex or integer.
    +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes + + +

      +
    1. +

      Dense vectors/matrices do not have an associated state; +

    2. +
    3. +

      Duplicate entries are either overwritten or added, there is no provision for + raising an error condition.

    + + +

    6.14 psb_geasb — Assembly a dense matrix

    + + +
    +call psb_geasb(x, desc_a, info, mold)
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +desc_a

    +

    The communication descriptor.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a variable of type psb_desc_type.
    +

    +

    +mold

    +

    The desired dynamic type for the internal vector storage.
    Scope: local.
    Type: optional.
    Intent: in.
    Specified as: an object of a class derived from psb_T_base_vect_type; + this is only allowed when x is of type psb_T_vect_type.

    +

    +

    +

    +On Return

    +

    +

    +

    +x

    + + +

    The dense matrix to be assembled.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one or two array with the ALLOCATABLE or an + object of type psb_T_vect_type, of type real, complex or integer.
    +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      On entry to this routine the descriptor must be in the assembled state, + i.e. psb_cdasb must already have been called. +

    2. +
    3. +

      If the bldmode=psb_matbld_remote_ value was specified at allocation + time, contributions defined on the current process but belonging to a + remote process will be handled accordingly. This is most likely to occur in + finite element applications, with dupl=psb_dupl_add_.

    + + +

    6.15 psb_gefree — Frees a dense matrix

    + + +
    +call psb_gefree(x, desc_a, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    The dense matrix to be freed.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a rank one or two array with the ALLOCATABLE or an + object of type psb_T_vect_type, of type real, complex or integer.
    +

    +

    +desc_a

    +

    The communication descriptor.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a variable of type psb_desc_type.

    +

    +

    +

    +On Return

    +

    + + +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + +

    6.16 psb_gelp — Applies a left permutation to a dense matrix

    + + +
    +call psb_gelp(trans, iperm, x, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +trans

    +

    A character that specifies whether to permute A or AT.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a single character with value ’N’ for A or ’T’ for AT.
    +

    +

    +iperm

    +

    An integer array containing permutation information.
    Scope: local
    Type: required
    Intent: in.
    Specified as: an integer one-dimensional array.
    +

    +

    +x

    +

    The dense matrix to be permuted.
    Scope: local
    Type: required
    Intent: inout.
    Specified as: a one or two dimensional array.

    +

    + + +

    +

    +On Return

    +

    +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + +

    6.17 psb_glob_to_loc — Global to local indices convertion

    + + +
    +call psb_glob_to_loc(x, y, desc_a, info, iact,owned)
    +call psb_glob_to_loc(x, desc_a, info, iact,owned)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    An integer vector of indices to be converted.
    Scope: local
    Type: required
    Intent: in, inout.
    Specified as: a rank one integer array.
    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +iact

    +

    specifies action to be taken in case of range errors. Scope: global
    Type: optional
    Intent: in.
    Specified as: a character variable Ignore, Warning or Abort, default Ignore. + + +

    +

    +owned

    +

    Specfies valid range of input Scope: global
    Type: optional
    Intent: in.
    If true, then only indices strictly owned by the current process are + considered valid, if false then halo indices are also accepted. Default: false.

    +

    +

    +

    +On Return

    +

    +

    +

    +x

    +

    If y is not present, then x is overwritten with the translated integer indices. + Scope: global
    Type: required
    Intent: inout.
    Specified as: a rank one integer array. +

    +

    +y

    +

    If y is present, then y is overwritten with the translated integer indices, + and x is left unchanged. Scope: global
    Type: optional
    Intent: out.
    Specified as: a rank one integer array. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      If an input index is out of range, then the corresponding output index is + set to a negative number; + + +

    2. +
    3. +

      The default Ignore means that the negative output is the only action + taken on an out-of-range input.

    + + + +

    6.18 psb_loc_to_glob — Local to global indices conversion

    + + + +
    +call psb_loc_to_glob(x, y, desc_a, info, iact)
    +call psb_loc_to_glob(x, desc_a, info, iact)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    An integer vector of indices to be converted.
    Scope: local
    Type: required
    Intent: in, inout.
    Specified as: a rank one integer array.
    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +iact

    +

    specifies action to be taken in case of range errors. Scope: global
    Type: optional
    Intent: in.
    Specified as: a character variable Ignore, Warning or Abort, default Ignore.

    +

    + + + +

    +

    +On Return

    +

    +

    +

    +x

    +

    If y is not present, then x is overwritten with the translated integer indices. + Scope: global
    Type: required
    Intent: inout.
    Specified as: a rank one integer array. +

    +

    +y

    +

    If y is not present, then y is overwritten with the translated integer indices, + and x is left unchanged. Scope: global
    Type: optional
    Intent: out.
    Specified as: a rank one integer array. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    + + + +

    6.19 psb_is_owned —

    + + + +
    +call psb_is_owned(x, desc_a)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    Integer index.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a scalar integer.
    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type.

    +

    +

    +

    +On Return

    +

    +

    +

    +Function value

    + + +

    A logical mask which is true if x is owned by the current process Scope: + local
    Type: required
    Intent: out.

    +

    Notes +

      +
    1. +

      This routine returns a .true. value for an index that is strictly owned by + the current process, excluding the halo indices

    + + +

    6.20 psb_owned_index —

    + + +
    +call psb_owned_index(y, x, desc_a, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    Integer indices.
    Scope: local
    Type: required
    Intent: in, inout.
    Specified as: a scalar or a rank one integer array.
    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +iact

    +

    specifies action to be taken in case of range errors. Scope: global
    Type: optional
    Intent: in.
    Specified as: a character variable Ignore, Warning or Abort, default Ignore.

    +

    + + +

    +

    +On Return

    +

    +

    +

    +y

    +

    A logical mask which is true for all corresponding entries of x that are + owned by the current process Scope: local
    Type: required
    Intent: out.
    Specified as: a scalar or rank one logical array. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      This routine returns a .true. value for those indices that are strictly + owned by the current process, excluding the halo indices

    + + +

    6.21 psb_is_local —

    + + +
    +call psb_is_local(x, desc_a)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    Integer index.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a scalar integer.
    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type.

    +

    +

    +

    +On Return

    +

    +

    +

    +Function value

    + + +

    A logical mask which is true if x is local to the current process Scope: + local
    Type: required
    Intent: out.

    +

    Notes +

      +
    1. +

      This routine returns a .true. value for an index that is local to the current + process, including the halo indices

    + +

    6.22 psb_local_index —

    + + + +
    +call psb_local_index(y, x, desc_a, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    Integer indices.
    Scope: local
    Type: required
    Intent: in, inout.
    Specified as: a scalar or a rank one integer array.
    +

    +

    +desc_a

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +iact

    +

    specifies action to be taken in case of range errors. Scope: global
    Type: optional
    Intent: in.
    Specified as: a character variable Ignore, Warning or Abort, default Ignore.

    +

    + + + +

    +

    +On Return

    +

    +

    +

    +y

    +

    A logical mask which is true for all corresponding entries of x that are + local to the current process Scope: local
    Type: required
    Intent: out.
    Specified as: a scalar or rank one logical array. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      This routine returns a .true. value for those indices that are local to the + current process, including the halo indices.

    + + + +

    6.23 psb_get_boundary — Extract list of boundary elements

    + + + +
    +call psb_get_boundary(bndel, desc, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +desc

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type.

    +

    +

    +

    +On Return

    +

    +

    +

    +bndel

    +

    The list of boundary elements on the calling process, in local numbering.
    Scope: local
    Type: required
    Intent: out.
    Specified as: a rank one array with the ALLOCATABLE attribute, of type + integer.
    +

    +

    +info

    + + + +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      If there are no boundary elements (i.e., if the local part of the connectivity + graph is self-contained) the output vector is set to the “not allocated” + state. +

    2. +
    3. +

      Otherwise the size of bndel will be exactly equal to the number of + boundary elements.

    + + + +

    6.24 psb_get_overlap — Extract list of overlap elements

    + + + +
    +call psb_get_overlap(ovrel, desc, info)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +desc

    +

    the communication descriptor.
    Scope:local.
    Type:required.
    Intent: in.
    Specified as: a structured data of type psb_desc_type.

    +

    +

    +

    +On Return

    +

    +

    +

    +ovrel

    +

    The list of overlap elements on the calling process, in local numbering.
    Scope: local
    Type: required
    Intent: out.
    Specified as: a rank one array with the ALLOCATABLE attribute, of type + integer.
    +

    +

    +info

    + + + +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      If there are no overlap elements the output vector is set to the “not + allocated” state. +

    2. +
    3. +

      Otherwise the size of ovrel will be exactly equal to the number of overlap + elements.

    + + + +

    6.25 psb_sp_getrow — Extract row(s) from a sparse matrix

    + + + +
    +call psb_sp_getrow(row, a, nz, ia, ja, val, info, &
    +              & append, nzin, lrw)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +row

    +

    The (first) row to be extracted.
    Scope:local
    Type:required
    Intent: in.
    Specified as: an integer > 0. +

    +

    +a

    +

    the matrix from which to get rows.
    Scope:local
    Type:required
    Intent: in.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +append

    +

    Whether to append or overwrite existing output.
    Scope:local
    Type:optional
    Intent: in.
    Specified as: a logical value default: false (overwrite). + + + +

    +

    +nzin

    +

    Input size to be appended to.
    Scope:local
    Type:optional
    Intent: in.
    Specified as: an integer > 0. When append is true, specifies how many + entries in the output vectors are already filled. +

    +

    +lrw

    +

    The last row to be extracted.
    Scope:local
    Type:optional
    Intent: in.
    Specified as: an integer > 0, default: row. +

    +

    +

    +

    +On Return

    +

    +

    +

    +nz

    +

    the number of elements returned by this call.
    Scope:local.
    Type:required.
    Intent: out.
    Returned as: an integer scalar. +

    +

    +ia

    +

    the row indices.
    Scope:local.
    Type:required.
    Intent: inout.
    Specified as: an integer array with the ALLOCATABLE attribute. +

    +

    +ja

    + + + +

    the column indices of the elements to be inserted.
    Scope:local.
    Type:required.
    Intent: inout.
    Specified as: an integer array with the ALLOCATABLE attribute. +

    +

    +val

    +

    the elements to be inserted.
    Scope:local.
    Type:required.
    Intent: inout.
    Specified as: a real array with the ALLOCATABLE attribute. +

    +

    +info

    +

    Error code.
    Scope: local
    Type: required
    Intent: out.
    An integer value; 0 means no error has been detected.

    +

    Notes +

      +
    1. +

      The output nz is always the size of the output generated by the current + call; thus, if append=.true., the total output size will be nzin + nz, with + the newly extracted coefficients stored in entries nzin+1:nzin+nz of the + array arguments; +

    2. +
    3. +

      When append=.true. the output arrays are reallocated as necessary; +

    4. +
    5. +

      The row and column indices are returned in the local numbering + scheme; if the global numbering is desired, the user may employ the + psb_loc_to_glob routine on the output.

    + + + +

    6.26 psb_sizeof — Memory occupation

    +

    This function computes the memory occupation of a PSBLAS object. + + + +

    +isz = psb_sizeof(a)
    +isz = psb_sizeof(desc_a)
    +isz = psb_sizeof(prec)
    +
    +

    +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +a

    +

    A sparse matrix A.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +desc_a

    +

    Communication descriptor.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a structured data of type psb_desc_type. +

    +

    +prec

    +

    Scope: local
    Type: required
    Intent: in.
    Specified as: a preconditioner data structure psb_prec_type. + + + +

    +

    +On Return

    +

    +

    +

    +Function value

    +

    The memory occupation of the object specified in the calling sequence, in + bytes.
    Scope: local
    Returned as: an integer(psb_long_int_k_) number.

    + + + +

    6.27 Sorting utilities —

    +

    psb_msort — Sorting by the Merge-sort algorithm +

    psb_qsort — Sorting by the Quicksort algorithm +

    psb_hsort — Sorting by the Heapsort algorithm + + + +

    +call psb_msort(x,ix,dir,flag)
    +call psb_qsort(x,ix,dir,flag)
    +call psb_hsort(x,ix,dir,flag)
    +
    +

    +

    These serial routines sort a sequence X into ascending or descending order. The +argument meaning is identical for the three calls; the only difference is the algorithm +used to accomplish the task (see Usage Notes below). +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +x

    +

    The sequence to be sorted.
    Type:required.
    Specified as: an integer, real or complex array of rank 1. +

    +

    +ix

    +

    A vector of indices.
    Type:optional.
    Specified as: an integer array of (at least) the same size as X. +

    +

    +dir

    +

    The desired ordering.
    Type:optional.
    Specified as: an integer value: +

    +

    + Integer and real data:

    +

    psb_sort_up_, psb_sort_down_, psb_asort_up_, + psb_asort_down_; default psb_sort_up_. + + + +

    +

    + Complex data:

    +

    psb_lsort_up_, psb_lsort_down_, psb_asort_up_, + psb_asort_down_; default psb_lsort_up_.

    +
    +

    +flag

    +

    Whether to keep the original values in IX.
    Type:optional.
    Specified as: an integer value psb_sort_ovw_idx_ or psb_sort_keep_idx_; + default psb_sort_ovw_idx_. +

    +

    +

    +

    +On Return

    +

    +

    +

    +x

    +

    The sequence of values, in the chosen ordering.
    Type:required.
    Specified as: an integer, real or complex array of rank 1. +

    +

    +ix

    +

    A vector of indices.
    Type: Optional
    An integer array of rank 1, whose entries are moved to the same position + as the corresponding entries in x.

    +

    Notes +

      +
    1. +

      For integer or real data the sorting can be performed in the up/down + direction, on the natural or absolute values; +

    2. +
    3. +

      For complex data the sorting can be done in a lexicographic order (i.e.: + sort on the real part with ties broken according to the imaginary part) or + on the absolute values; + + + +

    4. +
    5. +

      The routines return the items in the chosen ordering; the output difference + is the handling of ties (i.e. items with an equal value) in the original input. + With the merge-sort algorithm ties are preserved in the same relative + order as they had in the original sequence, while this is not guaranteed for + quicksort or heapsort; +

    6. +
    7. +

      If flag = psb_sort_ovw_idx_ then the entries in ix(1 : n) where n is the size + of x are initialized to ix(i) i; thus, upon return from the subroutine, + for each index i we have in ix(i) the position that the item x(i) occupied + in the original data sequence; +

    8. +
    9. +

      If flag = psb_sort_keep_idx_ the routine will assume that the entries in + ix(:) have already been initialized by the user; +

    10. +
    11. +

      The three sorting algorithms have a similar O(nlog n) expected running time; + in the average case quicksort will be the fastest and merge-sort the slowest. + However note that: +

        +
      1. +

        The worst case running time for quicksort is O(n2); the algorithm + implemented here follows the well-known median-of-three heuristics, + but the worst case may still apply; +

      2. +
      3. +

        The worst case running time for merge-sort and heap-sort is + O(nlog n) as the average case; +

      4. +
      5. +

        The merge-sort algorithm is implemented to take advantage of + subsequences that may be already in the desired ordering prior to + the subroutine call; this situation is relatively common when dealing + with groups of indices of sparse matrix entries, thus merge-sort is the + preferred choice when a sorting is needed by other routines in the + library.

      + + + +
    + + + + + + + + + + +href="userhtml.html#userhtmlse9.html" >up]

    + id="tailuserhtmlse6.html"> diff --git a/docs/html/userhtmlse7.html b/docs/html/userhtmlse7.html index 16277374..6e245159 100644 --- a/docs/html/userhtmlse7.html +++ b/docs/html/userhtmlse7.html @@ -11,77 +11,2174 @@

    +href="userhtmlse4.html#tailuserhtmlse7.html">tail] [up]

    7 Parallel environment routines

    + id="x12-1050007">Parallel environment routines - +

    7.1 psb_init — Initializes PSBLAS parallel environment

    + + + +
    +call psb_init(ctxt, np, basectxt, ids)
    +
    +

    +

    This subroutine initializes the PSBLAS parallel environment, defining a virtual +parallel machine. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +np

    +

    Number of processes in the PSBLAS virtual parallel machine.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value.  Default: use all available processes. +

    +

    +basectxt

    +

    the initial communication context. The new context will be defined from + the processes participating in the initial one.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value.  Default: use MPI_COMM_WORLD. +

    +

    +ids

    +

    Identities of the processes to use for the new context; the argument is + ignored when np is not specified. This allows the processes in the new + environment to be in an order different from the original one.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer array.  Default: use the indices (0np - 1).

    + + + +

    +

    +

    +On Return

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine, type + psb_ctxt_type. Note that this is always a duplicate of basectxt, + so that library communications are completely separated from other + communication operations.
    Scope: global.
    Type: required.
    Intent: out.
    Specified as: an integer variable.

    +

    Notes +

      +
    1. +

      A call to this routine must precede any other PSBLAS call. +

    2. +
    3. +

      It is an error to specify a value for np greater than the number of processes + available in the underlying base parallel environment.

    + + + +

    7.2 psb_info — Return information about PSBLAS parallel environment

    + + + +
    +call psb_info(ctxt, iam, np)
    +
    +

    +

    This subroutine returns information about the PSBLAS parallel environment, +defining a virtual parallel machine. +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable.

    +

    +

    +

    +On Return

    +

    +

    +

    +iam

    +

    Identifier of current process in the PSBLAS virtual parallel machine.
    Scope: local.
    Type: required.
    Intent: out.
    Specified as: an integer value. -1 iam np - 1  +

    +

    +np

    + + + +

    Number of processes in the PSBLAS virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: out.
    Specified as: an integer variable.  

    +

    Notes +

      +
    1. +

      For processes in the virtual parallel machine the identifier will satisfy + 0 iam np - 1; +

    2. +
    3. +

      If the user has requested on psb_init a number of processes less than + the total available in the parallel execution environment, the remaining + processes will have on return iam = -1; the only call involving ctxt that + any such process may execute is to psb_exit.

    + + + +

    7.3 psb_exit — Exit from PSBLAS parallel environment

    + + + +
    +call psb_exit(ctxt)
    +call psb_exit(ctxt,close)
    +
    +

    +

    This subroutine exits from the PSBLAS parallel virtual machine. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +close

    +

    Whether to close all data structures related to the virtual parallel machine, + besides those associated with ctxt.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: a logical variable, default value: true.

    +

    Notes +

      +
    1. +

      This routine may be called even if a previous call to psb_info has returned + with iam = -1; indeed, it it is the only routine that may be called with + argument ctxt in this situation. + + + +

    2. +
    3. +

      A call to this routine with close=.true. implies a call to MPI_Finalize, + after which no parallel routine may be called. +

    4. +
    5. +

      If the user whishes to use multiple communication contexts in the + same program, or to enter and exit multiple times into the parallel + environment, this routine may be called to selectively close the contexts + with close=.false., while on the last call it should be called with + close=.true. to shutdown in a clean way the entire parallel environment.

    + + + +

    7.4 psb_get_mpi_comm — Get the MPI communicator

    + + + +
    +icomm = psb_get_mpi_comm(ctxt)
    +
    +

    +

    This function returns the MPI communicator associated with a PSBLAS +context +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable.

    +

    +

    +

    +On Return

    +

    +

    +

    +Function value

    +

    The MPI communicator associated with the PSBLAS virtual parallel + machine.
    Scope: global.
    Type: required.
    Intent: out.

    +

    Notes The subroutine version psb_get_mpicomm is still available but is +deprecated. + + + +

    7.5 psb_get_mpi_rank — Get the MPI rank

    + + + +
    +rank = psb_get_mpi_rank(ctxt, id)
    +
    +

    +

    This function returns the MPI rank of the PSBLAS process id +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +id

    +

    Identifier of a process in the PSBLAS virtual parallel machine.
    Scope: local.
    Type: required.
    Intent: in.
    Specified as: an integer value. 0 id np -

    +

    +

    +

    +On Return

    +

    +

    +

    +Funciton value

    + + + +

    The MPI rank associated with the PSBLAS process id.
    Scope: local.
    Type: required.
    Intent: out.

    +

    Notes The subroutine version psb_get_rank is still available but is deprecated. + + +

    7.6 psb_wtime — Wall clock timing

    + + +
    +time = psb_wtime()
    +
    +

    +

    This function returns a wall clock timer. The resolution of the timer is dependent +on the underlying parallel environment implementation. +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Exit

    +

    +

    +

    +Function value

    +

    the elapsed time in seconds.
    Returned as: a real(psb_dpk_) variable.

    + + +

    7.7 psb_barrier — Sinchronization point parallel environment

    + + +
    +call psb_barrier(ctxt)
    +
    +

    +

    This subroutine acts as an explicit synchronization point for the PSBLAS parallel +virtual machine. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable.

    + + +

    7.8 psb_abort — Abort a computation

    + + +
    +call psb_abort(ctxt)
    +
    +

    +

    This subroutine aborts computation on the parallel virtual machine. +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable.

    + + +

    7.9 psb_bcast — Broadcast data

    + + +
    +call psb_bcast(ctxt, dat [, root, mode, request])
    +
    +

    +

    This subroutine implements a broadcast operation based on the underlying +communication library. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +dat

    +

    On the root process, the data to be broadcast.
    Scope: global.
    Type: required.
    Intent: inout.
    Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical variable, which may be + a scalar or rank 1 array.  Type, kind, rank and size must agree on all + processes. +

    +

    +root

    +

    Root process holding data to be broadcast.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value 0 <= root <= np - 1, default 0   + + +

    +

    +mode

    +

    Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
    +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    +

    +

    +On Return

    +

    +

    +

    +dat

    +

    On all processes other than root, the broadcasted data.
    Scope: global.
    Type: required.
    Intent: inout.
    Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical scalar.  Type, kind, rank + and size must agree on all processes. +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    Notes +

      +
    1. +

      The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

    2. +
    3. +

      The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

      +

      +

      +  call psb_bcast(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
      +
      +

      +
    4. +
    5. +

      When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

      +

      +

      +  call psb_bcast(ctxt,dat,mode=psb_collective_start_,request=bcast_request)
      +  ....... ! Do not access dat
      +  call psb_bcast(ctxt,dat,mode=psb_collective_end_, request=bcast_request)
      +
      +

      +
    + + +

    7.10 psb_sum — Global sum

    + + +
    +call psb_sum(ctxt, dat [, root, mode, request])
    +
    +

    +

    This subroutine implements a sum reduction operation based on the underlying +communication library. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +dat

    +

    The local contribution to the global sum.
    Scope: global.
    Type: required.
    Intent: inout.
    Specified as: an integer, real or complex variable, which may be a scalar, or + a rank 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

    +

    +root

    +

    Process to hold the final sum, or -1 to make it available on all processes.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value -1 <= root <= np - 1, default -1.   + + +

    +

    +mode

    +

    Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
    +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    +

    +

    +On Return

    +

    +

    +

    +dat

    +

    On destination process(es), the result of the sum operation.
    Scope: global.
    Type: required.
    Intent: inout.
    Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.
    Type, kind, rank and size must agree on all processes. +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    Notes +

      +
    1. +

      The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

    2. +
    3. +

      The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

      +

      +

      +  call psb_sum(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
      +
      +

      +
    4. +
    5. +

      When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

      +

      +

      +  call psb_sum(ctxt,dat,mode=psb_collective_start_,request=sum_request)
      +  ....... ! Do not access dat
      +  call psb_sum(ctxt,dat,mode=psb_collective_end_,request=sum_request)
      +
      +

      +
    + + +

    7.11 psb_max — Global maximum

    + + +
    +call psb_max(ctxt, dat [, root, mode, request])
    +
    +

    +

    This subroutine implements a maximum valuereduction operation based on the +underlying communication library. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +dat

    +

    The local contribution to the global maximum.
    Scope: local.
    Type: required.
    Intent: inout.
    Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

    +

    +root

    +

    Process to hold the final maximum, or -1 to make it available on all + processes.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value -1 <= root <= np - 1, default -1.
    + + +

    +

    +mode

    +

    Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
    +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    +

    +

    +On Return

    +

    +

    +

    +dat

    +

    On destination process(es), the result of the maximum operation.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    + +

    Notes +

      +
    1. +

      The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

    2. +
    3. +

      The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

      +

      +

      +  call psb_max(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
      +
      +

      +
    4. +
    5. +

      When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

      +

      +

      +  call psb_max(ctxt,dat,mode=psb_collective_start_,request=max_request)
      +  ....... ! Do not access dat
      +  call psb_max(ctxt,dat,mode=psb_collective_end_,request=max_request)
      +
      +

      +
    + + + +

    7.12 psb_min — Global minimum

    + + + +
    +call psb_min(ctxt, dat [, root, mode, request])
    +
    +

    +

    This subroutine implements a minimum value reduction operation based on the +underlying communication library. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +dat

    +

    The local contribution to the global minimum.
    Scope: local.
    Type: required.
    Intent: inout.
    Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

    +

    +root

    +

    Process to hold the final value, or -1 to make it available on all processes.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value -1 <= root <= np - 1, default -1.
    + + + +

    +

    +mode

    +

    Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
    +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    +

    +

    +On Return

    +

    +

    +

    +dat

    +

    On destination process(es), the result of the minimum operation.
    Scope: global.
    Type: required.
    Intent: inout.
    Specified as: an integer or real variable, which may be a scalar, or a rank + 1 or 2 array.
    Type, kind, rank and size must agree on all processes. +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    Notes +

      +
    1. +

      The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

    2. +
    3. +

      The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

      +

      +

      +  call psb_min(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
      +
      +

      +
    4. +
    5. +

      When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

      +

      +

      +  call psb_min(ctxt,dat,mode=psb_collective_start_,request=min_request)
      +  ....... ! Do not access dat
      +  call psb_min(ctxt,dat,mode=psb_collective_end_,request=min_request)
      +
      +

      +
    + + + +

    7.13 psb_amx — Global maximum absolute value

    + + + +
    +call psb_amx(ctxt, dat [, root, mode, request])
    +
    +

    +

    This subroutine implements a maximum absolute value reduction operation based +on the underlying communication library. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +dat

    +

    The local contribution to the global maximum.
    Scope: local.
    Type: required.
    Intent: inout.
    Specified as: an integer, real or complex variable, which may be a scalar, or + a rank 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

    +

    +root

    +

    Process to hold the final value, or -1 to make it available on all processes.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value -1 <= root <= np - 1, default -1.
    + + + +

    +

    +mode

    +

    Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
    +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    +

    +

    +On Return

    +

    +

    +

    +dat

    +

    On destination process(es), the result of the maximum operation.
    Scope: global.
    Type: required.
    Intent: inout.
    Specified as: an integer, real or complex variable, which may be a scalar, or + a rank 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    + + + +

    Notes +

      +
    1. +

      The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

    2. +
    3. +

      The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

      +

      +

      +  call psb_amx(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
      +
      +

      +
    4. +
    5. +

      When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

      +

      +

      +  call psb_amx(ctxt,dat,mode=psb_collective_start_,request=amx_request)
      +  ....... ! Do not access dat
      +  call psb_amx(ctxt,dat,mode=psb_collective_end_,request=amx_request)
      +
      +

      +
    + + + +

    7.14 psb_amn — Global minimum absolute value

    + + + +
    +call psb_amn(ctxt, dat [, root, mode, request])
    +
    +

    +

    This subroutine implements a minimum absolute value reduction operation based +on the underlying communication library. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +dat

    +

    The local contribution to the global minimum.
    Scope: local.
    Type: required.
    Intent: inout.
    Specified as: an integer, real or complex variable, which may be a scalar, or + a rank 1 or 2 array.  Type, kind, rank and size must agree on all processes. +

    +

    +root

    +

    Process to hold the final value, or -1 to make it available on all processes.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value -1 <= root <= np - 1, default -1.
    + + + +

    +

    +mode

    +

    Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
    +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    +

    +

    +On Return

    +

    +

    +

    +dat

    +

    On destination process(es), the result of the minimum operation.
    Scope: global.
    Type: required.
    Intent: inout.
    Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array.
    Type, kind, rank and size must agree on all processes. +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    Notes +

      +
    1. +

      The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

    2. +
    3. +

      The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

      +

      +

      +  call psb_amn(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
      +
      +

      +
    4. +
    5. +

      When splitting the operation in two calls, the dat argument must not be + accessed between calls: +

      +

      +

      +  call psb_amn(ctxt,dat,mode=psb_collective_start_,request=amn_request)
      +  ....... ! Do not access dat
      +  call psb_amn(ctxt,dat,mode=psb_collective_end_,request=amn_request)
      +
      +

      +
    + + + +

    7.15 psb_nrm2 — Global 2-norm reduction

    + + + +
    +call psb_nrm2(ctxt, dat [, root, mode, request])
    +
    +

    +

    This subroutine implements a 2-norm value reduction operation based on the +underlying communication library. +

    +

    +Type:

    +

    Synchronous. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +dat

    +

    The local contribution to the global minimum.
    Scope: local.
    Type: required.
    Intent: inout.
    Specified as: a real variable, which may be a scalar, or a rank 1 array. +  Kind, rank and size must agree on all processes. +

    +

    +root

    +

    Process to hold the final value, or -1 to make it available on all processes.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value -1 <= root <= np - 1, default -1.
    + + + +

    +

    +mode

    +

    Whether the call is started in non-blocking mode and completed later, or + is executed synchronously.
    Scope: global.
    Type: optional.
    Intent: in.
    Specified as: an integer value. The action to be taken is determined by + its bit fields, which can be set with bitwise OR. Basic action values are + psb_collective_start_, psb_collective_end_. Default: both fields are + selected (i.e. require synchronous completion).
    +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    +

    +

    +

    +On Return

    +

    +

    +

    +dat

    +

    On destination process(es), the result of the 2-norm reduction.
    Scope: global.
    Type: required.
    Intent: inout.
    Specified as: a real variable, which may be a scalar, or a rank 1 array.
    Kind, rank and size must agree on all processes. +

    +

    +request

    +

    A request variable to check for operation completion.
    Scope: local.
    Type: optional.
    Intent: inout.
    If mode does not specify synchronous completion, then this variable must + be present.

    + + + +

    Notes +

      +
    1. +

      This reduction is appropriate to compute the results of multiple (local) + NRM2 operations at the same time. +

    2. +
    3. +

      Denoting by dati the value of the variable dat on process i, the output res + is equivalent to the computation of +

      +      ∘ ∑------
+res =      dat2i,
+         i
+
      +

      with care taken to avoid unnecessary overflow. +

    4. +
    5. +

      The dat argument is both input and output, and its value may be changed + even on processes different from the final result destination. +

    6. +
    7. +

      The mode argument can be built with the bitwise IOR() operator; in the + following example, the argument is forcing immediate completion, hence the + request argument needs not be specified: +

      +

      +

      +  call psb_nrm2(ctxt,dat,mode=ior(psb_collective_start_,psb_collective_end_))
      +
      +

      +
    8. +
    9. +

      When splitting the operation in two calls, the dat argument must not be + accessed between calls: + + + +

      +

      +

      +  call psb_nrm2(ctxt,dat,mode=psb_collective_start_,request=nrm2_request)
      +  ....... ! Do not access dat
      +  call psb_nrm2(ctxt,dat,mode=psb_collective_end_,request=nrm2_request)
      +
      +

      +
    + + + +

    7.16 psb_snd — Send data

    + + + +
    +call psb_snd(ctxt, dat, dst, m)
    +
    +

    +

    This subroutine sends a packet of data to a destination. +

    +

    +Type:

    +

    Synchronous: see usage notes. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +dat

    +

    The data to be sent.
    Scope: local.
    Type: required.
    Intent: in.
    Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical scalar.  Type, kind and + rank must agree on sender and receiver process; if m is not specified, size + must agree as well. +

    +

    +dst

    +

    Destination process.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer value 0 <= dst <= np - 1.
    + + + +

    +

    +m

    +

    Number of rows.
    Scope: global.
    Type: Optional.
    Intent: in.
    Specified as: an integer value 0 <= m <= size(dat,1).
    When dat is a rank 2 array, specifies the number of rows to be sent + independently of the leading dimension size(dat,1); must have the same + value on sending and receiving processes.

    +

    +

    +

    +On Return

    +

    +

    Notes +

      +
    1. +

      This subroutine implies a synchronization, but only between the calling + process and the destination process dst.

    + + + +

    7.17 psb_rcv — Receive data

    + + + +
    +call psb_rcv(ctxt, dat, src, m)
    +
    +

    +

    This subroutine receives a packet of data to a destination. +

    +

    +Type:

    +

    Synchronous: see usage notes. +

    +

    +On Entry

    +

    +

    +

    +ctxt

    +

    the communication context identifying the virtual parallel machine.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer variable. +

    +

    +src

    +

    Source process.
    Scope: global.
    Type: required.
    Intent: in.
    Specified as: an integer value 0 <= src <= np - 1.
    +

    +

    +m

    +

    Number of rows.
    Scope: global.
    Type: Optional.
    Intent: in.
    Specified as: an integer value 0 <= m <= size(dat,1).
    When dat is a rank 2 array, specifies the number of rows to be sent + independently of the leading dimension size(dat,1); must have the same + value on sending and receiving processes.

    + + + +

    +

    +

    +On Return

    +

    +

    +

    +dat

    +

    The data to be received.
    Scope: local.
    Type: required.
    Intent: inout.
    Specified as: an integer, real or complex variable, which may be a scalar, + or a rank 1 or 2 array, or a character or logical scalar.  Type, kind and + rank must agree on sender and receiver process; if m is not specified, size + must agree as well.

    +

    Notes +

      +
    1. +

      This subroutine implies a synchronization, but only between the calling + process and the source process src.

    + + + + +href="userhtml.html#userhtmlse10.html" >up]

    + id="tailuserhtmlse7.html"> diff --git a/docs/html/userhtmlse8.html b/docs/html/userhtmlse8.html index d4260b27..2e6b2ff0 100644 --- a/docs/html/userhtmlse8.html +++ b/docs/html/userhtmlse8.html @@ -11,13 +11,13 @@

    +href="userhtmlse5.html#tailuserhtmlse8.html">tail] [up]

    8 Error handling

    + id="x13-1230008">Error handling

    The PSBLAS library error handling policy has been completely rewritten in version 2.0. The idea behind the design of this new error handling strategy is to keep error messages on a stack allowing the user to trace back up to the point where the first @@ -36,7 +36,7 @@ zero, an error condition is raised. This process continues on all the levels of nested calls until the level where the user decides to abort the program execution.

    Figure 5 shows the layout of a generic 5 shows the layout of a generic psb_foo routine with respect to the PSBLAS-2.0 error handling policy. It is possible to see how, whenever an error condition is detected, the



    @@ -67,270 +67,241 @@ explicitly. >

    -
    subroutinesubroutine psb_foo(some args, info) + psb_foo(some args, ... + info) 
      if(error detected) then +  ... 
       if(error info=errcode1 + detected) then 
        call psb_errpush(psb_foo, errcode1) +   info=errcode1 
       goto 9999 +   end if +  call ... + psb_errpush(psb_foo, errcode1) 
     call psb_bar(some args, info) +   if(info .ne. zero) then +    goto 9999 
     info=errcode2 +   end if 
      call psb_errpush(psb_foo, errcode2) +  ... 
       call goto 9999 + psb_bar(some args, end if + info) 
      ... +9999 continue +  if(info if (err_act .eq. act_abort) then + .ne. zero) then 
      call psb_error(icontxt) +     return + info=errcode2 
      else +     return + call psb_errpush(psb_foo, errcode2) 
          goto 9999 
       end if 
       ... 
    9999 continue 
       if (err_act .eq. act_abort) then 
         call psb_error(icontxt) 
         return 
       else 
         return 
       end if 
     
    end subroutine psb_foo -
    +class="cmtt-9"> end if + +end subroutine psb_foo

    Listing 5: The layout of a generic psb_foo routine with respect to PSBLAS-2.0 -error handling policy.
    +error handling policy.

    Figure 6 reports a sample error message generated by the PSBLAS-2.0 +href="#x13-123026r6">6 reports a sample error message generated by the PSBLAS-2.0 library. This error has been generated by the fact that the user has chosen the invalid “FOO” storage format to represent the sparse matrix. From this error message it is possible to see that the error has been detected inside @@ -342,7 +313,7 @@ process).



    @@ -371,7 +342,7 @@ Aborting...
    Listing 6: A sample PSBLAS-3.0 error message. Process 0 detected an error -condition inside the psb_cest subroutine
    +condition inside the psb_cest subroutine
    @@ -379,28 +350,236 @@ condition inside the psb_cest subroutine
    +
    call psb_errpush(err_c, r_name, i_err, a_err)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +err_c

    +

    the error code
    Scope: local
    Type: required
    Intent: in.
    Specified as: an integer. +

    +

    +r_name

    +

    the soutine where the error has been caught.
    Scope: local
    Type: required
    Intent: in.
    Specified as: a string.
    +

    +

    +i_err

    +

    addional info for error code
    Scope: local
    Type: optional
    Specified as: an integer array
    +

    +

    +a_err

    + + +

    addional info for error code
    Scope: local
    Type: optional
    Specified as: a string.

    + + + +

    8.2 psb_error — Prints the error stack content and aborts execution

    + +
    call psb_error(icontxt)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +icontxt

    +

    the communication context.
    Scope: global
    Type: optional
    Intent: in.
    Specified as: an integer.

    + + + +

    8.3 psb_set_errverbosity — Sets the verbosity of error messages

    + +
    call psb_set_errverbosity(v)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +v

    +

    the verbosity level
    Scope: global
    Type: required
    Intent: in.
    Specified as: an integer.

    + + + +

    8.4 psb_set_erraction — Set the type of action to be taken upon error +condition

    + +
    call psb_set_erraction(err_act)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +err_act

    +

    the type of action.
    Scope: global
    Type: required
    Intent: in.
    Specified as: an integer. Possible values: psb_act_ret, psb_act_abort.

    + + + + + + + +href="userhtml.html#userhtmlse11.html" >up]

    + id="tailuserhtmlse8.html"> diff --git a/docs/html/userhtmlse9.html b/docs/html/userhtmlse9.html index 878297b4..f0fa9516 100644 --- a/docs/html/userhtmlse9.html +++ b/docs/html/userhtmlse9.html @@ -11,47 +11,728 @@

    +href="userhtmlse6.html#tailuserhtmlse9.html">tail] [up]

    9 Utilities

    + id="x14-1280009">Utilities

    We have some utilities available for input and output of sparse matrices; the interfaces to these routines are available in the module psb_util_mod. -

    +

    9.1 hb_read — Read a sparse matrix from a file in the Harwell–Boeing +format

    + +
    call hb_read(a, iret, iunit, filename, b, mtitle)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +filename

    +

    The name of the file to be read.
    Type:optional.
    Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

    +

    +iunit

    +

    The Fortran file unit number.
    Type:optional.
    Specified as: an integer value. Only meaningful if filename is not -.

    +

    +

    +

    +On Return

    +

    +

    +

    +a

    +

    the sparse matrix read from file.
    Type:required.
    Specified as: a structured data of type psb_Tspmat_type. + + + +

    +

    +b

    +

    Rigth hand side(s).
    Type: Optional
    An array of type real or complex, rank 2 and having the ALLOCATABLE + attribute; will be allocated and filled in if the input file contains a right + hand side, otherwise will be left in the UNALLOCATED state. +

    +

    +mtitle

    +

    Matrix title.
    Type: Optional
    A charachter variable of length 72 holding a copy of the matrix title as + specified by the Harwell-Boeing format and contained in the input file. +

    +

    +iret

    +

    Error code.
    Type: required
    An integer value; 0 means no error has been detected.

    + + + +

    9.2 hb_write — Write a sparse matrix to a file in the Harwell–Boeing +format

    + +
    call hb_write(a, iret, iunit, filename, key, rhs, mtitle)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +a

    +

    the sparse matrix to be written.
    Type:required.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +b

    +

    Rigth hand side.
    Type: Optional
    An array of type real or complex, rank 1 and having the ALLOCATABLE + attribute; will be allocated and filled in if the input file contains a right + hand side. +

    +

    +filename

    +

    The name of the file to be written to.
    Type:optional.
    Specified as: a character variable containing a valid file name, or -, in + which case the default output unit 6 (i.e. standard output in Unix jargon) + is used. Default: -. +

    +

    +iunit

    +

    The Fortran file unit number.
    Type:optional.
    Specified as: an integer value. Only meaningful if filename is not -. + + + +

    +

    +key

    +

    Matrix key.
    Type: Optional
    A charachter variable of length 8 holding the matrix key as specified by + the Harwell-Boeing format and to be written to file. +

    +

    +mtitle

    +

    Matrix title.
    Type: Optional
    A charachter variable of length 72 holding the matrix title as specified by + the Harwell-Boeing format and to be written to file.

    +

    +

    +

    +On Return

    +

    +

    +

    +iret

    +

    Error code.
    Type: required
    An integer value; 0 means no error has been detected.

    + + + +

    9.3 mm_mat_read — Read a sparse matrix from a file in the MatrixMarket +format

    + +
    call mm_mat_read(a, iret, iunit, filename)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +filename

    +

    The name of the file to be read.
    Type:optional.
    Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

    +

    +iunit

    +

    The Fortran file unit number.
    Type:optional.
    Specified as: an integer value. Only meaningful if filename is not -.

    +

    +

    +

    +On Return

    +

    +

    +

    +a

    +

    the sparse matrix read from file.
    Type:required.
    Specified as: a structured data of type psb_Tspmat_type. + + +

    +

    +iret

    +

    Error code.
    Type: required
    An integer value; 0 means no error has been detected.

    + + +

    9.4 mm_array_read — Read a dense array from a file in the MatrixMarket +format

    + +
    call mm_array_read(b, iret, iunit, filename)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +filename

    +

    The name of the file to be read.
    Type:optional.
    Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

    +

    +iunit

    +

    The Fortran file unit number.
    Type:optional.
    Specified as: an integer value. Only meaningful if filename is not -.

    +

    +

    +

    +On Return

    +

    +

    +

    +b

    +

    Rigth hand side(s).
    Type: required
    An array of type real or complex, rank 1 or 2 and having the + + + ALLOCATABLE attribute, or an object of type psb_T_vect_type, of + type real or complex.
    Will be allocated and filled in if the input file contains a right hand side, + otherwise will be left in the UNALLOCATED state.
    +

    +

    +iret

    +

    Error code.
    Type: required
    An integer value; 0 means no error has been detected.

    + + +

    9.5 mm_mat_write — Write a sparse matrix to a file in the MatrixMarket +format

    + +
    call mm_mat_write(a, mtitle, iret, iunit, filename)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +a

    +

    the sparse matrix to be written.
    Type:required.
    Specified as: a structured data of type psb_Tspmat_type. +

    +

    +mtitle

    +

    Matrix title.
    Type: required
    A charachter variable holding a descriptive title for the matrix to be + written to file. +

    +

    +filename

    +

    The name of the file to be written to.
    Type:optional.
    Specified as: a character variable containing a valid file name, or -, in + which case the default output unit 6 (i.e. standard output in Unix jargon) + is used. Default: -. +

    +

    +iunit

    +

    The Fortran file unit number.
    Type:optional.
    Specified as: an integer value. Only meaningful if filename is not -.

    + +

    +

    +

    +On Return

    +

    +

    +

    +iret

    +

    Error code.
    Type: required
    An integer value; 0 means no error has been detected.

    +

    Notes +

    If this function is called on a matrix a on a distributed communicator only the +local part is written in output. To get a single MatrixMarket file with the whole +matrix when appropriate, e.g. for debugging purposes, one could gather the whole +matrix on a single rank and then write it. Consider the following example for a +double precision matrix +

    +

    +

    +type(psb_ldspmat_type) :: aglobal
    +
    +call psb_gather(aglobal,a,desc_a,info)
    +if (iam == psb_root_) then
    +call mm_mat_write(aglobal,mtitle,info,filename)
    +end if
    +call psb_spfree(aglobal, desc_a, info)
    +
    +

    +

    To simplify this procedure in C, there is a utility function +

    +

    +

    +psb_i_t psb_c_<s,d,c,z>global_mat_write(ah,cdh);
    +
    +

    +

    that produces exactly this result. + + + +

    9.6 mm_array_write — Write a dense array from a file in the MatrixMarket +format

    + +
    call mm_array_write(b, vtitle, iret, iunit, filename)
    + +

    +

    +

    +Type:

    +

    Asynchronous. +

    +

    +On Entry

    +

    +

    +

    +b

    +

    Rigth hand side(s).
    Type: required
    An array of type real or complex, rank 1 or 2, or an object of type + psb_T_vect_type, of type real or complex; its contents will be written to + disk.
    +

    +

    +filename

    +

    The name of the file to be written.
    +

    +

    +vtitle

    +

    Matrix title.
    Type: required
    A charachter variable holding a descriptive title for the vector to be written + to file. Type:optional.
    Specified as: a character variable containing a valid file name, or -, in + which case the default input unit 5 (i.e. standard input in Unix jargon) is + used. Default: -. +

    +

    +iunit

    + + + +

    The Fortran file unit number.
    Type:optional.
    Specified as: an integer value. Only meaningful if filename is not -.

    +

    +

    +

    +On Return

    +

    +

    +

    +iret

    +

    Error code.
    Type: required
    An integer value; 0 means no error has been detected.

    +

    Notes +

    If this function is called on a vector v on a distributed communicator only the +local part is written in output. To get a single MatrixMarket file with the whole +vector when appropriate, e.g. for debugging purposes, one could gather the whole +vector on a single rank and then write it. Consider the following example for a double +precision vector +

    +

    +

    +real(psb_dpk_), allocatable :: vglobal(:)
    +
    +call psb_gather(vglobal,v,desc,info)
    +if (iam == psb_root_) then
    +call mm_array_write(vglobal,vtitle,info,filename)
    +end if
    +call deallocate(vglobal, stat=info)
    +
    +

    +

    To simplify this procedure in C, there is a utility function +

    +

    +

    +psb_i_t psb_c_<s,d,c,z>global_vec_write(vh,cdh);
    +
    +

    +

    that produces exactly this result. + + + + + + + + + +

    +href="userhtml.html#userhtmlse12.html" >up]

    + id="tailuserhtmlse9.html"> diff --git a/docs/psblas-3.8.pdf b/docs/psblas-3.9.pdf similarity index 68% rename from docs/psblas-3.8.pdf rename to docs/psblas-3.9.pdf index 95e953d8..6d104b9d 100644 --- a/docs/psblas-3.8.pdf +++ b/docs/psblas-3.9.pdf @@ -4,220 +4,220 @@ << /Type /ObjStm /N 100 -/First 819 -/Length 6377 ->> -stream -1 0 5 38 6 77 9 117 10 144 13 185 14 217 17 263 18 299 21 345 -22 379 25 425 26 464 29 515 30 562 33 608 34 643 37 684 38 727 41 773 -42 816 45 867 46 905 49 956 50 1026 53 1077 54 1147 57 1198 58 1270 61 1321 -62 1393 65 1444 66 1522 69 1573 70 1638 73 1689 74 1740 77 1791 78 1851 81 1903 -82 2006 85 2058 86 2161 89 2213 90 2285 93 2337 94 2409 97 2461 98 2542 101 2594 -102 2631 105 2678 106 2716 109 2768 110 2810 113 2862 114 2938 117 2990 118 3069 121 3121 -122 3211 125 3263 126 3358 129 3410 130 3492 133 3544 134 3618 137 3670 138 3740 141 3792 -142 3883 145 3936 146 4006 149 4059 150 4114 153 4167 154 4236 157 4289 158 4345 161 4398 -162 4459 165 4512 166 4569 169 4622 170 4679 173 4732 174 4814 177 4867 178 4920 181 4973 -182 5010 185 5057 186 5103 189 5155 190 5190 193 5242 194 5317 197 5369 198 5450 201 5502 +/First 865 +/Length 16060 +>> +stream +1 0 5 38 6 195 9 235 10 329 13 370 14 488 17 534 18 672 21 718 +22 846 25 892 26 1045 29 1096 30 1292 33 1338 34 1471 37 1512 38 1691 41 1737 +42 1913 45 1964 46 2112 49 2163 50 2450 53 2501 54 2788 57 2839 58 3136 61 3187 +62 3484 65 3535 66 3862 69 3913 70 4181 73 4232 74 4442 77 4493 78 4751 81 4803 +82 5234 85 5286 86 5717 89 5769 90 6063 93 6115 94 6409 97 6461 98 6827 101 6879 +102 7018 105 7065 106 7212 109 7264 110 7431 113 7483 114 7817 117 7869 118 8218 121 8270 +122 8677 125 8729 126 9164 129 9216 130 9595 133 9647 134 9965 137 10017 138 10285 141 10337 +142 10701 145 10754 146 11064 149 11117 150 11346 153 11399 154 11683 157 11736 158 11955 161 12008 +162 12255 165 12308 166 12547 169 12600 170 12839 173 12892 174 13220 177 13273 178 13489 181 13542 +182 13681 185 13728 186 13918 189 13970 190 14099 193 14151 194 14480 197 14532 198 14906 201 14958 % 1 0 obj << /S /GoTo /D (title.0) >> % 5 0 obj -(PSBLAS-v3.8.0 User's Guide) +(\376\377\000P\000S\000B\000L\000A\000S\000-\000v\0003\000.\0009\000.\0000\000\040\000U\000s\000e\000r\000'\000s\000\040\000G\000u\000i\000d\000e) % 6 0 obj << /S /GoTo /D (section.1) >> % 9 0 obj -(1 Introduction) +(\376\377\0001\000\040\000I\000n\000t\000r\000o\000d\000u\000c\000t\000i\000o\000n) % 10 0 obj << /S /GoTo /D (section.2) >> % 13 0 obj -(2 General overview) +(\376\377\0002\000\040\000G\000e\000n\000e\000r\000a\000l\000\040\000o\000v\000e\000r\000v\000i\000e\000w) % 14 0 obj << /S /GoTo /D (subsection.2.1) >> % 17 0 obj -(2.1 Basic Nomenclature) +(\376\377\0002\000.\0001\000\040\000B\000a\000s\000i\000c\000\040\000N\000o\000m\000e\000n\000c\000l\000a\000t\000u\000r\000e) % 18 0 obj << /S /GoTo /D (subsection.2.2) >> % 21 0 obj -(2.2 Library contents) +(\376\377\0002\000.\0002\000\040\000L\000i\000b\000r\000a\000r\000y\000\040\000c\000o\000n\000t\000e\000n\000t\000s) % 22 0 obj << /S /GoTo /D (subsection.2.3) >> % 25 0 obj -(2.3 Application structure) +(\376\377\0002\000.\0003\000\040\000A\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 26 0 obj << /S /GoTo /D (subsubsection.2.3.1) >> % 29 0 obj -(2.3.1 User-defined index mappings) +(\376\377\0002\000.\0003\000.\0001\000\040\000U\000s\000e\000r\000-\000d\000e\000f\000i\000n\000e\000d\000\040\000i\000n\000d\000e\000x\000\040\000m\000a\000p\000p\000i\000n\000g\000s) % 30 0 obj << /S /GoTo /D (subsection.2.4) >> % 33 0 obj -(2.4 Programming model) +(\376\377\0002\000.\0004\000\040\000P\000r\000o\000g\000r\000a\000m\000m\000i\000n\000g\000\040\000m\000o\000d\000e\000l) % 34 0 obj << /S /GoTo /D (section.3) >> % 37 0 obj -(3 Data Structures and Classes) +(\376\377\0003\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000s\000\040\000a\000n\000d\000\040\000C\000l\000a\000s\000s\000e\000s) % 38 0 obj << /S /GoTo /D (subsection.3.1) >> % 41 0 obj -(3.1 Descriptor data structure) +(\376\377\0003\000.\0001\000\040\000D\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000d\000a\000t\000a\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 42 0 obj << /S /GoTo /D (subsubsection.3.1.1) >> % 45 0 obj -(3.1.1 Descriptor Methods) +(\376\377\0003\000.\0001\000.\0001\000\040\000D\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000M\000e\000t\000h\000o\000d\000s) % 46 0 obj << /S /GoTo /D (subsubsection.3.1.2) >> % 49 0 obj -(3.1.2 get\137local\137rows \204 Get number of local rows) +(\376\377\0003\000.\0001\000.\0002\000\040\000g\000e\000t\000\137\000l\000o\000c\000a\000l\000\137\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000l\000o\000c\000a\000l\000\040\000r\000o\000w\000s) % 50 0 obj << /S /GoTo /D (subsubsection.3.1.3) >> % 53 0 obj -(3.1.3 get\137local\137cols \204 Get number of local cols) +(\376\377\0003\000.\0001\000.\0003\000\040\000g\000e\000t\000\137\000l\000o\000c\000a\000l\000\137\000c\000o\000l\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000l\000o\000c\000a\000l\000\040\000c\000o\000l\000s) % 54 0 obj << /S /GoTo /D (subsubsection.3.1.4) >> % 57 0 obj -(3.1.4 get\137global\137rows \204 Get number of global rows) +(\376\377\0003\000.\0001\000.\0004\000\040\000g\000e\000t\000\137\000g\000l\000o\000b\000a\000l\000\137\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000g\000l\000o\000b\000a\000l\000\040\000r\000o\000w\000s) % 58 0 obj << /S /GoTo /D (subsubsection.3.1.5) >> % 61 0 obj -(3.1.5 get\137global\137cols \204 Get number of global cols) +(\376\377\0003\000.\0001\000.\0005\000\040\000g\000e\000t\000\137\000g\000l\000o\000b\000a\000l\000\137\000c\000o\000l\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000g\000l\000o\000b\000a\000l\000\040\000c\000o\000l\000s) % 62 0 obj << /S /GoTo /D (subsubsection.3.1.6) >> % 65 0 obj -(3.1.6 get\137global\137indices \204 Get vector of global indices) +(\376\377\0003\000.\0001\000.\0006\000\040\000g\000e\000t\000\137\000g\000l\000o\000b\000a\000l\000\137\000i\000n\000d\000i\000c\000e\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000v\000e\000c\000t\000o\000r\000\040\000o\000f\000\040\000g\000l\000o\000b\000a\000l\000\040\000i\000n\000d\000i\000c\000e\000s) % 66 0 obj << /S /GoTo /D (subsubsection.3.1.7) >> % 69 0 obj -(3.1.7 get\137context \204 Get communication context) +(\376\377\0003\000.\0001\000.\0007\000\040\000g\000e\000t\000\137\000c\000o\000n\000t\000e\000x\000t\000\040\040\024\000\040\000G\000e\000t\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000c\000o\000n\000t\000e\000x\000t) % 70 0 obj << /S /GoTo /D (subsubsection.3.1.8) >> % 73 0 obj -(3.1.8 Clone \204 clone current object) +(\376\377\0003\000.\0001\000.\0008\000\040\000C\000l\000o\000n\000e\000\040\040\024\000\040\000c\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000o\000b\000j\000e\000c\000t) % 74 0 obj << /S /GoTo /D (subsubsection.3.1.9) >> % 77 0 obj -(3.1.9 CNV \204 convert internal storage format) +(\376\377\0003\000.\0001\000.\0009\000\040\000C\000N\000V\000\040\040\024\000\040\000c\000o\000n\000v\000e\000r\000t\000\040\000i\000n\000t\000e\000r\000n\000a\000l\000\040\000s\000t\000o\000r\000a\000g\000e\000\040\000f\000o\000r\000m\000a\000t) % 78 0 obj << /S /GoTo /D (subsubsection.3.1.10) >> % 81 0 obj -(3.1.10 psb\137cd\137get\137large\137threshold \204 Get threshold for index mapping switch) +(\376\377\0003\000.\0001\000.\0001\0000\000\040\000p\000s\000b\000\137\000c\000d\000\137\000g\000e\000t\000\137\000l\000a\000r\000g\000e\000\137\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\040\024\000\040\000G\000e\000t\000\040\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\000f\000o\000r\000\040\000i\000n\000d\000e\000x\000\040\000m\000a\000p\000p\000i\000n\000g\000\040\000s\000w\000i\000t\000c\000h) % 82 0 obj << /S /GoTo /D (subsubsection.3.1.11) >> % 85 0 obj -(3.1.11 psb\137cd\137set\137large\137threshold \204 Set threshold for index mapping switch) +(\376\377\0003\000.\0001\000.\0001\0001\000\040\000p\000s\000b\000\137\000c\000d\000\137\000s\000e\000t\000\137\000l\000a\000r\000g\000e\000\137\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\040\024\000\040\000S\000e\000t\000\040\000t\000h\000r\000e\000s\000h\000o\000l\000d\000\040\000f\000o\000r\000\040\000i\000n\000d\000e\000x\000\040\000m\000a\000p\000p\000i\000n\000g\000\040\000s\000w\000i\000t\000c\000h) % 86 0 obj << /S /GoTo /D (subsubsection.3.1.12) >> % 89 0 obj -(3.1.12 get\137p\137adjcncy \204 Get process adjacency list) +(\376\377\0003\000.\0001\000.\0001\0002\000\040\000g\000e\000t\000\137\000p\000\137\000a\000d\000j\000c\000n\000c\000y\000\040\040\024\000\040\000G\000e\000t\000\040\000p\000r\000o\000c\000e\000s\000s\000\040\000a\000d\000j\000a\000c\000e\000n\000c\000y\000\040\000l\000i\000s\000t) % 90 0 obj << /S /GoTo /D (subsubsection.3.1.13) >> % 93 0 obj -(3.1.13 set\137p\137adjcncy \204 Set process adjacency list) +(\376\377\0003\000.\0001\000.\0001\0003\000\040\000s\000e\000t\000\137\000p\000\137\000a\000d\000j\000c\000n\000c\000y\000\040\040\024\000\040\000S\000e\000t\000\040\000p\000r\000o\000c\000e\000s\000s\000\040\000a\000d\000j\000a\000c\000e\000n\000c\000y\000\040\000l\000i\000s\000t) % 94 0 obj << /S /GoTo /D (subsubsection.3.1.14) >> % 97 0 obj -(3.1.14 fnd\137owner \204 Find the owner process of a set of indices) +(\376\377\0003\000.\0001\000.\0001\0004\000\040\000f\000n\000d\000\137\000o\000w\000n\000e\000r\000\040\040\024\000\040\000F\000i\000n\000d\000\040\000t\000h\000e\000\040\000o\000w\000n\000e\000r\000\040\000p\000r\000o\000c\000e\000s\000s\000\040\000o\000f\000\040\000a\000\040\000s\000e\000t\000\040\000o\000f\000\040\000i\000n\000d\000i\000c\000e\000s) % 98 0 obj << /S /GoTo /D (subsubsection.3.1.15) >> % 101 0 obj -(3.1.15 Named Constants) +(\376\377\0003\000.\0001\000.\0001\0005\000\040\000N\000a\000m\000e\000d\000\040\000C\000o\000n\000s\000t\000a\000n\000t\000s) % 102 0 obj << /S /GoTo /D (subsection.3.2) >> % 105 0 obj -(3.2 Sparse Matrix class) +(\376\377\0003\000.\0002\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000c\000l\000a\000s\000s) % 106 0 obj << /S /GoTo /D (subsubsection.3.2.1) >> % 109 0 obj -(3.2.1 Sparse Matrix Methods) +(\376\377\0003\000.\0002\000.\0001\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000M\000e\000t\000h\000o\000d\000s) % 110 0 obj << /S /GoTo /D (subsubsection.3.2.2) >> % 113 0 obj -(3.2.2 get\137nrows \204 Get number of rows in a sparse matrix) +(\376\377\0003\000.\0002\000.\0002\000\040\000g\000e\000t\000\137\000n\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000r\000o\000w\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 114 0 obj << /S /GoTo /D (subsubsection.3.2.3) >> % 117 0 obj -(3.2.3 get\137ncols \204 Get number of columns in a sparse matrix) +(\376\377\0003\000.\0002\000.\0003\000\040\000g\000e\000t\000\137\000n\000c\000o\000l\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000c\000o\000l\000u\000m\000n\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 118 0 obj << /S /GoTo /D (subsubsection.3.2.4) >> % 121 0 obj -(3.2.4 get\137nnzeros \204 Get number of nonzero elements in a sparse matrix) +(\376\377\0003\000.\0002\000.\0004\000\040\000g\000e\000t\000\137\000n\000n\000z\000e\000r\000o\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000n\000o\000n\000z\000e\000r\000o\000\040\000e\000l\000e\000m\000e\000n\000t\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 122 0 obj << /S /GoTo /D (subsubsection.3.2.5) >> % 125 0 obj -(3.2.5 get\137size \204 Get maximum number of nonzero elements in a sparse matrix) +(\376\377\0003\000.\0002\000.\0005\000\040\000g\000e\000t\000\137\000s\000i\000z\000e\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000a\000x\000i\000m\000u\000m\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000n\000o\000n\000z\000e\000r\000o\000\040\000e\000l\000e\000m\000e\000n\000t\000s\000\040\000i\000n\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 126 0 obj << /S /GoTo /D (subsubsection.3.2.6) >> % 129 0 obj -(3.2.6 sizeof \204 Get memory occupation in bytes of a sparse matrix) +(\376\377\0003\000.\0002\000.\0006\000\040\000s\000i\000z\000e\000o\000f\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000e\000m\000o\000r\000y\000\040\000o\000c\000c\000u\000p\000a\000t\000i\000o\000n\000\040\000i\000n\000\040\000b\000y\000t\000e\000s\000\040\000o\000f\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 130 0 obj << /S /GoTo /D (subsubsection.3.2.7) >> % 133 0 obj -(3.2.7 get\137fmt \204 Short description of the dynamic type) +(\376\377\0003\000.\0002\000.\0007\000\040\000g\000e\000t\000\137\000f\000m\000t\000\040\040\024\000\040\000S\000h\000o\000r\000t\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000i\000o\000n\000\040\000o\000f\000\040\000t\000h\000e\000\040\000d\000y\000n\000a\000m\000i\000c\000\040\000t\000y\000p\000e) % 134 0 obj << /S /GoTo /D (subsubsection.3.2.8) >> % 137 0 obj -(3.2.8 is\137bld, is\137upd, is\137asb \204 Status check) +(\376\377\0003\000.\0002\000.\0008\000\040\000i\000s\000\137\000b\000l\000d\000,\000\040\000i\000s\000\137\000u\000p\000d\000,\000\040\000i\000s\000\137\000a\000s\000b\000\040\040\024\000\040\000S\000t\000a\000t\000u\000s\000\040\000c\000h\000e\000c\000k) % 138 0 obj << /S /GoTo /D (subsubsection.3.2.9) >> % 141 0 obj -(3.2.9 is\137lower, is\137upper, is\137triangle, is\137unit \204 Format check) +(\376\377\0003\000.\0002\000.\0009\000\040\000i\000s\000\137\000l\000o\000w\000e\000r\000,\000\040\000i\000s\000\137\000u\000p\000p\000e\000r\000,\000\040\000i\000s\000\137\000t\000r\000i\000a\000n\000g\000l\000e\000,\000\040\000i\000s\000\137\000u\000n\000i\000t\000\040\040\024\000\040\000F\000o\000r\000m\000a\000t\000\040\000c\000h\000e\000c\000k) % 142 0 obj << /S /GoTo /D (subsubsection.3.2.10) >> % 145 0 obj -(3.2.10 cscnv \204 Convert to a different storage format) +(\376\377\0003\000.\0002\000.\0001\0000\000\040\000c\000s\000c\000n\000v\000\040\040\024\000\040\000C\000o\000n\000v\000e\000r\000t\000\040\000t\000o\000\040\000a\000\040\000d\000i\000f\000f\000e\000r\000e\000n\000t\000\040\000s\000t\000o\000r\000a\000g\000e\000\040\000f\000o\000r\000m\000a\000t) % 146 0 obj << /S /GoTo /D (subsubsection.3.2.11) >> % 149 0 obj -(3.2.11 csclip \204 Reduce to a submatrix) +(\376\377\0003\000.\0002\000.\0001\0001\000\040\000c\000s\000c\000l\000i\000p\000\040\040\024\000\040\000R\000e\000d\000u\000c\000e\000\040\000t\000o\000\040\000a\000\040\000s\000u\000b\000m\000a\000t\000r\000i\000x) % 150 0 obj << /S /GoTo /D (subsubsection.3.2.12) >> % 153 0 obj -(3.2.12 clean\137zeros \204 Eliminate zero coefficients) +(\376\377\0003\000.\0002\000.\0001\0002\000\040\000c\000l\000e\000a\000n\000\137\000z\000e\000r\000o\000s\000\040\040\024\000\040\000E\000l\000i\000m\000i\000n\000a\000t\000e\000\040\000z\000e\000r\000o\000\040\000c\000o\000e\000f\000f\000i\000c\000i\000e\000n\000t\000s) % 154 0 obj << /S /GoTo /D (subsubsection.3.2.13) >> % 157 0 obj -(3.2.13 get\137diag \204 Get main diagonal) +(\376\377\0003\000.\0002\000.\0001\0003\000\040\000g\000e\000t\000\137\000d\000i\000a\000g\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000a\000i\000n\000\040\000d\000i\000a\000g\000o\000n\000a\000l) % 158 0 obj << /S /GoTo /D (subsubsection.3.2.14) >> % 161 0 obj -(3.2.14 clip\137diag \204 Cut out main diagonal) +(\376\377\0003\000.\0002\000.\0001\0004\000\040\000c\000l\000i\000p\000\137\000d\000i\000a\000g\000\040\040\024\000\040\000C\000u\000t\000\040\000o\000u\000t\000\040\000m\000a\000i\000n\000\040\000d\000i\000a\000g\000o\000n\000a\000l) % 162 0 obj << /S /GoTo /D (subsubsection.3.2.15) >> % 165 0 obj -(3.2.15 tril \204 Return the lower triangle) +(\376\377\0003\000.\0002\000.\0001\0005\000\040\000t\000r\000i\000l\000\040\040\024\000\040\000R\000e\000t\000u\000r\000n\000\040\000t\000h\000e\000\040\000l\000o\000w\000e\000r\000\040\000t\000r\000i\000a\000n\000g\000l\000e) % 166 0 obj << /S /GoTo /D (subsubsection.3.2.16) >> % 169 0 obj -(3.2.16 triu \204 Return the upper triangle) +(\376\377\0003\000.\0002\000.\0001\0006\000\040\000t\000r\000i\000u\000\040\040\024\000\040\000R\000e\000t\000u\000r\000n\000\040\000t\000h\000e\000\040\000u\000p\000p\000e\000r\000\040\000t\000r\000i\000a\000n\000g\000l\000e) % 170 0 obj << /S /GoTo /D (subsubsection.3.2.17) >> % 173 0 obj -(3.2.17 psb\137set\137mat\137default \204 Set default storage format) +(\376\377\0003\000.\0002\000.\0001\0007\000\040\000p\000s\000b\000\137\000s\000e\000t\000\137\000m\000a\000t\000\137\000d\000e\000f\000a\000u\000l\000t\000\040\040\024\000\040\000S\000e\000t\000\040\000d\000e\000f\000a\000u\000l\000t\000\040\000s\000t\000o\000r\000a\000g\000e\000\040\000f\000o\000r\000m\000a\000t) % 174 0 obj << /S /GoTo /D (subsubsection.3.2.18) >> % 177 0 obj -(3.2.18 clone \204 Clone current object) +(\376\377\0003\000.\0002\000.\0001\0008\000\040\000c\000l\000o\000n\000e\000\040\040\024\000\040\000C\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000o\000b\000j\000e\000c\000t) % 178 0 obj << /S /GoTo /D (subsubsection.3.2.19) >> % 181 0 obj -(3.2.19 Named Constants) +(\376\377\0003\000.\0002\000.\0001\0009\000\040\000N\000a\000m\000e\000d\000\040\000C\000o\000n\000s\000t\000a\000n\000t\000s) % 182 0 obj << /S /GoTo /D (subsection.3.3) >> % 185 0 obj -(3.3 Dense Vector Data Structure) +(\376\377\0003\000.\0003\000\040\000D\000e\000n\000s\000e\000\040\000V\000e\000c\000t\000o\000r\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e) % 186 0 obj << /S /GoTo /D (subsubsection.3.3.1) >> % 189 0 obj -(3.3.1 Vector Methods) +(\376\377\0003\000.\0003\000.\0001\000\040\000V\000e\000c\000t\000o\000r\000\040\000M\000e\000t\000h\000o\000d\000s) % 190 0 obj << /S /GoTo /D (subsubsection.3.3.2) >> % 193 0 obj -(3.3.2 get\137nrows \204 Get number of rows in a dense vector) +(\376\377\0003\000.\0003\000.\0002\000\040\000g\000e\000t\000\137\000n\000r\000o\000w\000s\000\040\040\024\000\040\000G\000e\000t\000\040\000n\000u\000m\000b\000e\000r\000\040\000o\000f\000\040\000r\000o\000w\000s\000\040\000i\000n\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000v\000e\000c\000t\000o\000r) % 194 0 obj << /S /GoTo /D (subsubsection.3.3.3) >> % 197 0 obj -(3.3.3 sizeof \204 Get memory occupation in bytes of a dense vector) +(\376\377\0003\000.\0003\000.\0003\000\040\000s\000i\000z\000e\000o\000f\000\040\040\024\000\040\000G\000e\000t\000\040\000m\000e\000m\000o\000r\000y\000\040\000o\000c\000c\000u\000p\000a\000t\000i\000o\000n\000\040\000i\000n\000\040\000b\000y\000t\000e\000s\000\040\000o\000f\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000v\000e\000c\000t\000o\000r) % 198 0 obj << /S /GoTo /D (subsubsection.3.3.4) >> % 201 0 obj -(3.3.4 set \204 Set contents of the vector) +(\376\377\0003\000.\0003\000.\0004\000\040\000s\000e\000t\000\040\040\024\000\040\000S\000e\000t\000\040\000c\000o\000n\000t\000e\000n\000t\000s\000\040\000o\000f\000\040\000t\000h\000e\000\040\000v\000e\000c\000t\000o\000r) endstream endobj @@ -225,226 +225,447 @@ endobj << /Type /ObjStm /N 100 -/First 877 -/Length 6351 ->> -stream -202 0 206 52 207 123 210 175 211 227 214 274 215 322 218 369 219 407 222 449 -223 488 226 535 227 598 230 645 231 693 234 740 235 801 238 848 239 908 242 955 -243 1019 246 1066 247 1119 250 1166 251 1233 254 1280 255 1333 258 1380 259 1447 262 1495 -263 1556 266 1604 267 1672 270 1720 271 1794 274 1842 275 1902 278 1950 279 2005 282 2053 -283 2109 286 2157 287 2214 290 2256 291 2295 294 2342 295 2401 298 2448 299 2498 302 2545 -303 2609 306 2656 307 2722 310 2764 311 2805 314 2852 315 2925 318 2972 319 3048 322 3095 -323 3173 326 3220 327 3290 330 3337 331 3407 334 3454 335 3536 338 3583 339 3645 342 3692 -343 3778 346 3825 347 3892 350 3940 351 4000 354 4048 355 4136 358 4184 359 4246 362 4294 -363 4362 366 4410 367 4471 370 4519 371 4578 374 4626 375 4707 378 4755 379 4839 382 4887 -383 4971 386 5019 387 5063 390 5111 391 5158 394 5206 395 5250 398 5298 399 5345 402 5393 +/First 919 +/Length 15874 +>> +stream +202 0 206 52 207 358 210 410 211 621 214 668 215 865 218 912 219 1059 222 1101 +223 1250 226 1297 227 1554 230 1601 231 1777 234 1824 235 2068 238 2115 239 2354 242 2401 +243 2660 246 2707 247 2911 250 2958 251 3235 254 3282 255 3486 258 3533 259 3810 262 3858 +263 4105 266 4153 267 4438 270 4486 271 4804 274 4852 275 5091 278 5139 279 5350 282 5398 +283 5614 286 5662 287 5883 290 5925 291 6074 294 6121 295 6355 298 6402 299 6588 302 6635 +303 6897 306 6944 307 7216 310 7258 311 7420 314 7467 315 7774 318 7821 319 8143 322 8190 +323 8522 326 8569 327 8861 330 8908 331 9200 334 9247 335 9602 338 9649 339 9901 342 9948 +343 10335 346 10382 347 10659 350 10707 351 10949 354 10997 355 11388 358 11436 359 11688 362 11736 +363 12018 366 12066 367 12313 370 12361 371 12598 374 12646 375 13005 378 13053 379 13394 382 13442 +383 13783 386 13831 387 13972 390 14020 391 14176 394 14224 395 14365 398 14413 399 14569 402 14617 % 202 0 obj << /S /GoTo /D (subsubsection.3.3.5) >> % 206 0 obj -(3.3.5 get\137vect \204 Get a copy of the vector contents) +(\376\377\0003\000.\0003\000.\0005\000\040\000g\000e\000t\000\137\000v\000e\000c\000t\000\040\040\024\000\040\000G\000e\000t\000\040\000a\000\040\000c\000o\000p\000y\000\040\000o\000f\000\040\000t\000h\000e\000\040\000v\000e\000c\000t\000o\000r\000\040\000c\000o\000n\000t\000e\000n\000t\000s) % 207 0 obj << /S /GoTo /D (subsubsection.3.3.6) >> % 210 0 obj -(3.3.6 clone \204 Clone current object) +(\376\377\0003\000.\0003\000.\0006\000\040\000c\000l\000o\000n\000e\000\040\040\024\000\040\000C\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000o\000b\000j\000e\000c\000t) % 211 0 obj << /S /GoTo /D (subsection.3.4) >> % 214 0 obj -(3.4 Preconditioner data structure) +(\376\377\0003\000.\0004\000\040\000P\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000d\000a\000t\000a\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 215 0 obj << /S /GoTo /D (subsection.3.5) >> % 218 0 obj -(3.5 Heap data structure) +(\376\377\0003\000.\0005\000\040\000H\000e\000a\000p\000\040\000d\000a\000t\000a\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e) % 219 0 obj << /S /GoTo /D (section.4) >> % 222 0 obj -(4 Computational routines) +(\376\377\0004\000\040\000C\000o\000m\000p\000u\000t\000a\000t\000i\000o\000n\000a\000l\000\040\000r\000o\000u\000t\000i\000n\000e\000s) % 223 0 obj << /S /GoTo /D (subsection.4.1) >> % 226 0 obj -(4.1 psb\137geaxpby \204 General Dense Matrix Sum) +(\376\377\0004\000.\0001\000\040\000p\000s\000b\000\137\000g\000e\000a\000x\000p\000b\000y\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000S\000u\000m) % 227 0 obj << /S /GoTo /D (subsection.4.2) >> % 230 0 obj -(4.2 psb\137gedot \204 Dot Product) +(\376\377\0004\000.\0002\000\040\000p\000s\000b\000\137\000g\000e\000d\000o\000t\000\040\040\024\000\040\000D\000o\000t\000\040\000P\000r\000o\000d\000u\000c\000t) % 231 0 obj << /S /GoTo /D (subsection.4.3) >> % 234 0 obj -(4.3 psb\137gedots \204 Generalized Dot Product) +(\376\377\0004\000.\0003\000\040\000p\000s\000b\000\137\000g\000e\000d\000o\000t\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\000D\000o\000t\000\040\000P\000r\000o\000d\000u\000c\000t) % 235 0 obj << /S /GoTo /D (subsection.4.4) >> % 238 0 obj -(4.4 psb\137normi \204 Infinity-Norm of Vector) +(\376\377\0004\000.\0004\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\000i\000\040\040\024\000\040\000I\000n\000f\000i\000n\000i\000t\000y\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 239 0 obj << /S /GoTo /D (subsection.4.5) >> % 242 0 obj -(4.5 psb\137geamaxs \204 Generalized Infinity Norm) +(\376\377\0004\000.\0005\000\040\000p\000s\000b\000\137\000g\000e\000a\000m\000a\000x\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\000I\000n\000f\000i\000n\000i\000t\000y\000\040\000N\000o\000r\000m) % 243 0 obj << /S /GoTo /D (subsection.4.6) >> % 246 0 obj -(4.6 psb\137norm1 \204 1-Norm of Vector) +(\376\377\0004\000.\0006\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\0001\000\040\040\024\000\040\0001\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 247 0 obj << /S /GoTo /D (subsection.4.7) >> % 250 0 obj -(4.7 psb\137geasums \204 Generalized 1-Norm of Vector) +(\376\377\0004\000.\0007\000\040\000p\000s\000b\000\137\000g\000e\000a\000s\000u\000m\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\0001\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 251 0 obj << /S /GoTo /D (subsection.4.8) >> % 254 0 obj -(4.8 psb\137norm2 \204 2-Norm of Vector) +(\376\377\0004\000.\0008\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\0002\000\040\040\024\000\040\0002\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 255 0 obj << /S /GoTo /D (subsection.4.9) >> % 258 0 obj -(4.9 psb\137genrm2s \204 Generalized 2-Norm of Vector) +(\376\377\0004\000.\0009\000\040\000p\000s\000b\000\137\000g\000e\000n\000r\000m\0002\000s\000\040\040\024\000\040\000G\000e\000n\000e\000r\000a\000l\000i\000z\000e\000d\000\040\0002\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000V\000e\000c\000t\000o\000r) % 259 0 obj << /S /GoTo /D (subsection.4.10) >> % 262 0 obj -(4.10 psb\137norm1 \204 1-Norm of Sparse Matrix) +(\376\377\0004\000.\0001\0000\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\0001\000\040\040\024\000\040\0001\000-\000N\000o\000r\000m\000\040\000o\000f\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 263 0 obj << /S /GoTo /D (subsection.4.11) >> % 266 0 obj -(4.11 psb\137normi \204 Infinity Norm of Sparse Matrix) +(\376\377\0004\000.\0001\0001\000\040\000p\000s\000b\000\137\000n\000o\000r\000m\000i\000\040\040\024\000\040\000I\000n\000f\000i\000n\000i\000t\000y\000\040\000N\000o\000r\000m\000\040\000o\000f\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 267 0 obj << /S /GoTo /D (subsection.4.12) >> % 270 0 obj -(4.12 psb\137spmm \204 Sparse Matrix by Dense Matrix Product) +(\376\377\0004\000.\0001\0002\000\040\000p\000s\000b\000\137\000s\000p\000m\000m\000\040\040\024\000\040\000S\000p\000a\000r\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000b\000y\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x\000\040\000P\000r\000o\000d\000u\000c\000t) % 271 0 obj << /S /GoTo /D (subsection.4.13) >> % 274 0 obj -(4.13 psb\137spsm \204 Triangular System Solve) +(\376\377\0004\000.\0001\0003\000\040\000p\000s\000b\000\137\000s\000p\000s\000m\000\040\040\024\000\040\000T\000r\000i\000a\000n\000g\000u\000l\000a\000r\000\040\000S\000y\000s\000t\000e\000m\000\040\000S\000o\000l\000v\000e) % 275 0 obj << /S /GoTo /D (subsection.4.14) >> % 278 0 obj -(4.14 psb\137gemlt \204 Entrywise Product) +(\376\377\0004\000.\0001\0004\000\040\000p\000s\000b\000\137\000g\000e\000m\000l\000t\000\040\040\024\000\040\000E\000n\000t\000r\000y\000w\000i\000s\000e\000\040\000P\000r\000o\000d\000u\000c\000t) % 279 0 obj << /S /GoTo /D (subsection.4.15) >> % 282 0 obj -(4.15 psb\137gediv \204 Entrywise Division) +(\376\377\0004\000.\0001\0005\000\040\000p\000s\000b\000\137\000g\000e\000d\000i\000v\000\040\040\024\000\040\000E\000n\000t\000r\000y\000w\000i\000s\000e\000\040\000D\000i\000v\000i\000s\000i\000o\000n) % 283 0 obj << /S /GoTo /D (subsection.4.16) >> % 286 0 obj -(4.16 psb\137geinv \204 Entrywise Inversion) +(\376\377\0004\000.\0001\0006\000\040\000p\000s\000b\000\137\000g\000e\000i\000n\000v\000\040\040\024\000\040\000E\000n\000t\000r\000y\000w\000i\000s\000e\000\040\000I\000n\000v\000e\000r\000s\000i\000o\000n) % 287 0 obj << /S /GoTo /D (section.5) >> % 290 0 obj -(5 Communication routines) +(\376\377\0005\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000r\000o\000u\000t\000i\000n\000e\000s) % 291 0 obj << /S /GoTo /D (subsection.5.1) >> % 294 0 obj -(5.1 psb\137halo \204 Halo Data Communication) +(\376\377\0005\000.\0001\000\040\000p\000s\000b\000\137\000h\000a\000l\000o\000\040\040\024\000\040\000H\000a\000l\000o\000\040\000D\000a\000t\000a\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n) % 295 0 obj << /S /GoTo /D (subsection.5.2) >> % 298 0 obj -(5.2 psb\137ovrl \204 Overlap Update) +(\376\377\0005\000.\0002\000\040\000p\000s\000b\000\137\000o\000v\000r\000l\000\040\040\024\000\040\000O\000v\000e\000r\000l\000a\000p\000\040\000U\000p\000d\000a\000t\000e) % 299 0 obj << /S /GoTo /D (subsection.5.3) >> % 302 0 obj -(5.3 psb\137gather \204 Gather Global Dense Matrix) +(\376\377\0005\000.\0003\000\040\000p\000s\000b\000\137\000g\000a\000t\000h\000e\000r\000\040\040\024\000\040\000G\000a\000t\000h\000e\000r\000\040\000G\000l\000o\000b\000a\000l\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 303 0 obj << /S /GoTo /D (subsection.5.4) >> % 306 0 obj -(5.4 psb\137scatter \204 Scatter Global Dense Matrix) +(\376\377\0005\000.\0004\000\040\000p\000s\000b\000\137\000s\000c\000a\000t\000t\000e\000r\000\040\040\024\000\040\000S\000c\000a\000t\000t\000e\000r\000\040\000G\000l\000o\000b\000a\000l\000\040\000D\000e\000n\000s\000e\000\040\000M\000a\000t\000r\000i\000x) % 307 0 obj << /S /GoTo /D (section.6) >> % 310 0 obj -(6 Data management routines) +(\376\377\0006\000\040\000D\000a\000t\000a\000\040\000m\000a\000n\000a\000g\000e\000m\000e\000n\000t\000\040\000r\000o\000u\000t\000i\000n\000e\000s) % 311 0 obj << /S /GoTo /D (subsection.6.1) >> % 314 0 obj -(6.1 psb\137cdall \204 Allocates a communication descriptor) +(\376\377\0006\000.\0001\000\040\000p\000s\000b\000\137\000c\000d\000a\000l\000l\000\040\040\024\000\040\000A\000l\000l\000o\000c\000a\000t\000e\000s\000\040\000a\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 315 0 obj << /S /GoTo /D (subsection.6.2) >> % 318 0 obj -(6.2 psb\137cdins \204 Communication descriptor insert routine) +(\376\377\0006\000.\0002\000\040\000p\000s\000b\000\137\000c\000d\000i\000n\000s\000\040\040\024\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000i\000n\000s\000e\000r\000t\000\040\000r\000o\000u\000t\000i\000n\000e) % 319 0 obj << /S /GoTo /D (subsection.6.3) >> % 322 0 obj -(6.3 psb\137cdasb \204 Communication descriptor assembly routine) +(\376\377\0006\000.\0003\000\040\000p\000s\000b\000\137\000c\000d\000a\000s\000b\000\040\040\024\000\040\000C\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r\000\040\000a\000s\000s\000e\000m\000b\000l\000y\000\040\000r\000o\000u\000t\000i\000n\000e) % 323 0 obj << /S /GoTo /D (subsection.6.4) >> % 326 0 obj -(6.4 psb\137cdcpy \204 Copies a communication descriptor) +(\376\377\0006\000.\0004\000\040\000p\000s\000b\000\137\000c\000d\000c\000p\000y\000\040\040\024\000\040\000C\000o\000p\000i\000e\000s\000\040\000a\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 327 0 obj << /S /GoTo /D (subsection.6.5) >> % 330 0 obj -(6.5 psb\137cdfree \204 Frees a communication descriptor) +(\376\377\0006\000.\0005\000\040\000p\000s\000b\000\137\000c\000d\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000s\000\040\000a\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 331 0 obj << /S /GoTo /D (subsection.6.6) >> % 334 0 obj -(6.6 psb\137cdbldext \204 Build an extended communication descriptor) +(\376\377\0006\000.\0006\000\040\000p\000s\000b\000\137\000c\000d\000b\000l\000d\000e\000x\000t\000\040\040\024\000\040\000B\000u\000i\000l\000d\000\040\000a\000n\000\040\000e\000x\000t\000e\000n\000d\000e\000d\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000i\000o\000n\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000o\000r) % 335 0 obj << /S /GoTo /D (subsection.6.7) >> % 338 0 obj -(6.7 psb\137spall \204 Allocates a sparse matrix) +(\376\377\0006\000.\0007\000\040\000p\000s\000b\000\137\000s\000p\000a\000l\000l\000\040\040\024\000\040\000A\000l\000l\000o\000c\000a\000t\000e\000s\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 339 0 obj << /S /GoTo /D (subsection.6.8) >> % 342 0 obj -(6.8 psb\137spins \204 Insert a set of coefficients into a sparse matrix) +(\376\377\0006\000.\0008\000\040\000p\000s\000b\000\137\000s\000p\000i\000n\000s\000\040\040\024\000\040\000I\000n\000s\000e\000r\000t\000\040\000a\000\040\000s\000e\000t\000\040\000o\000f\000\040\000c\000o\000e\000f\000f\000i\000c\000i\000e\000n\000t\000s\000\040\000i\000n\000t\000o\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 343 0 obj << /S /GoTo /D (subsection.6.9) >> % 346 0 obj -(6.9 psb\137spasb \204 Sparse matrix assembly routine) +(\376\377\0006\000.\0009\000\040\000p\000s\000b\000\137\000s\000p\000a\000s\000b\000\040\040\024\000\040\000S\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000a\000s\000s\000e\000m\000b\000l\000y\000\040\000r\000o\000u\000t\000i\000n\000e) % 347 0 obj << /S /GoTo /D (subsection.6.10) >> % 350 0 obj -(6.10 psb\137spfree \204 Frees a sparse matrix) +(\376\377\0006\000.\0001\0000\000\040\000p\000s\000b\000\137\000s\000p\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000s\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 351 0 obj << /S /GoTo /D (subsection.6.11) >> % 354 0 obj -(6.11 psb\137sprn \204 Reinit sparse matrix structure for psblas routines.) +(\376\377\0006\000.\0001\0001\000\040\000p\000s\000b\000\137\000s\000p\000r\000n\000\040\040\024\000\040\000R\000e\000i\000n\000i\000t\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000s\000t\000r\000u\000c\000t\000u\000r\000e\000\040\000f\000o\000r\000\040\000p\000s\000b\000l\000a\000s\000\040\000r\000o\000u\000t\000i\000n\000e\000s\000.) % 355 0 obj << /S /GoTo /D (subsection.6.12) >> % 358 0 obj -(6.12 psb\137geall \204 Allocates a dense matrix) +(\376\377\0006\000.\0001\0002\000\040\000p\000s\000b\000\137\000g\000e\000a\000l\000l\000\040\040\024\000\040\000A\000l\000l\000o\000c\000a\000t\000e\000s\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 359 0 obj << /S /GoTo /D (subsection.6.13) >> % 362 0 obj -(6.13 psb\137geins \204 Dense matrix insertion routine) +(\376\377\0006\000.\0001\0003\000\040\000p\000s\000b\000\137\000g\000e\000i\000n\000s\000\040\040\024\000\040\000D\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000i\000n\000s\000e\000r\000t\000i\000o\000n\000\040\000r\000o\000u\000t\000i\000n\000e) % 363 0 obj << /S /GoTo /D (subsection.6.14) >> % 366 0 obj -(6.14 psb\137geasb \204 Assembly a dense matrix) +(\376\377\0006\000.\0001\0004\000\040\000p\000s\000b\000\137\000g\000e\000a\000s\000b\000\040\040\024\000\040\000A\000s\000s\000e\000m\000b\000l\000y\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 367 0 obj << /S /GoTo /D (subsection.6.15) >> % 370 0 obj -(6.15 psb\137gefree \204 Frees a dense matrix) +(\376\377\0006\000.\0001\0005\000\040\000p\000s\000b\000\137\000g\000e\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000s\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 371 0 obj << /S /GoTo /D (subsection.6.16) >> % 374 0 obj -(6.16 psb\137gelp \204 Applies a left permutation to a dense matrix) +(\376\377\0006\000.\0001\0006\000\040\000p\000s\000b\000\137\000g\000e\000l\000p\000\040\040\024\000\040\000A\000p\000p\000l\000i\000e\000s\000\040\000a\000\040\000l\000e\000f\000t\000\040\000p\000e\000r\000m\000u\000t\000a\000t\000i\000o\000n\000\040\000t\000o\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000m\000a\000t\000r\000i\000x) % 375 0 obj << /S /GoTo /D (subsection.6.17) >> % 378 0 obj -(6.17 psb\137glob\137to\137loc \204 Global to local indices convertion) +(\376\377\0006\000.\0001\0007\000\040\000p\000s\000b\000\137\000g\000l\000o\000b\000\137\000t\000o\000\137\000l\000o\000c\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000t\000o\000\040\000l\000o\000c\000a\000l\000\040\000i\000n\000d\000i\000c\000e\000s\000\040\000c\000o\000n\000v\000e\000r\000t\000i\000o\000n) % 379 0 obj << /S /GoTo /D (subsection.6.18) >> % 382 0 obj -(6.18 psb\137loc\137to\137glob \204 Local to global indices conversion) +(\376\377\0006\000.\0001\0008\000\040\000p\000s\000b\000\137\000l\000o\000c\000\137\000t\000o\000\137\000g\000l\000o\000b\000\040\040\024\000\040\000L\000o\000c\000a\000l\000\040\000t\000o\000\040\000g\000l\000o\000b\000a\000l\000\040\000i\000n\000d\000i\000c\000e\000s\000\040\000c\000o\000n\000v\000e\000r\000s\000i\000o\000n) % 383 0 obj << /S /GoTo /D (subsection.6.19) >> % 386 0 obj -(6.19 psb\137is\137owned \204 ) +(\376\377\0006\000.\0001\0009\000\040\000p\000s\000b\000\137\000i\000s\000\137\000o\000w\000n\000e\000d\000\040\040\024\000\040) % 387 0 obj << /S /GoTo /D (subsection.6.20) >> % 390 0 obj -(6.20 psb\137owned\137index \204 ) +(\376\377\0006\000.\0002\0000\000\040\000p\000s\000b\000\137\000o\000w\000n\000e\000d\000\137\000i\000n\000d\000e\000x\000\040\040\024\000\040) % 391 0 obj << /S /GoTo /D (subsection.6.21) >> % 394 0 obj -(6.21 psb\137is\137local \204 ) +(\376\377\0006\000.\0002\0001\000\040\000p\000s\000b\000\137\000i\000s\000\137\000l\000o\000c\000a\000l\000\040\040\024\000\040) % 395 0 obj << /S /GoTo /D (subsection.6.22) >> % 398 0 obj -(6.22 psb\137local\137index \204 ) +(\376\377\0006\000.\0002\0002\000\040\000p\000s\000b\000\137\000l\000o\000c\000a\000l\000\137\000i\000n\000d\000e\000x\000\040\040\024\000\040) % 399 0 obj << /S /GoTo /D (subsection.6.23) >> % 402 0 obj -(6.23 psb\137get\137boundary \204 Extract list of boundary elements) +(\376\377\0006\000.\0002\0003\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000b\000o\000u\000n\000d\000a\000r\000y\000\040\040\024\000\040\000E\000x\000t\000r\000a\000c\000t\000\040\000l\000i\000s\000t\000\040\000o\000f\000\040\000b\000o\000u\000n\000d\000a\000r\000y\000\040\000e\000l\000e\000m\000e\000n\000t\000s) + +endstream +endobj +404 0 obj +<< +/Type /ObjStm +/N 100 +/First 928 +/Length 16303 +>> +stream +403 0 407 48 408 376 411 424 412 766 415 814 416 1030 419 1078 420 1241 423 1283 +424 1470 427 1517 428 1834 431 1881 432 2269 435 2316 436 2626 439 2673 440 2961 443 3008 +444 3256 447 3303 448 3512 451 3559 452 3906 455 3953 456 4172 459 4219 460 4410 463 4458 +464 4624 467 4672 468 4858 471 4906 472 5092 475 5140 476 5407 479 5455 480 5722 483 5770 +484 6009 487 6057 488 6218 491 6266 492 6442 495 6484 496 6593 499 6640 500 6994 503 7041 +504 7435 507 7482 508 7853 511 7900 512 8366 515 8408 516 8489 519 8536 520 8990 523 9037 +524 9483 527 9530 528 9986 531 10033 532 10489 535 10536 536 10992 539 11039 540 11505 543 11548 +544 11707 547 11755 548 11991 551 12039 552 12260 555 12308 556 12584 559 12632 560 12977 563 13025 +564 13271 567 13319 568 13525 571 13568 572 13697 575 13745 576 14035 579 14078 580 14169 583 14217 +584 14374 587 14422 588 14614 591 14662 592 14816 595 14864 596 15023 599 15066 600 15238 603 15281 +% 403 0 obj +<< /S /GoTo /D (subsection.6.24) >> +% 407 0 obj +(\376\377\0006\000.\0002\0004\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000o\000v\000e\000r\000l\000a\000p\000\040\040\024\000\040\000E\000x\000t\000r\000a\000c\000t\000\040\000l\000i\000s\000t\000\040\000o\000f\000\040\000o\000v\000e\000r\000l\000a\000p\000\040\000e\000l\000e\000m\000e\000n\000t\000s) +% 408 0 obj +<< /S /GoTo /D (subsection.6.25) >> +% 411 0 obj +(\376\377\0006\000.\0002\0005\000\040\000p\000s\000b\000\137\000s\000p\000\137\000g\000e\000t\000r\000o\000w\000\040\040\024\000\040\000E\000x\000t\000r\000a\000c\000t\000\040\000r\000o\000w\000\050\000s\000\051\000\040\000f\000r\000o\000m\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x) +% 412 0 obj +<< /S /GoTo /D (subsection.6.26) >> +% 415 0 obj +(\376\377\0006\000.\0002\0006\000\040\000p\000s\000b\000\137\000s\000i\000z\000e\000o\000f\000\040\040\024\000\040\000M\000e\000m\000o\000r\000y\000\040\000o\000c\000c\000u\000p\000a\000t\000i\000o\000n) +% 416 0 obj +<< /S /GoTo /D (subsection.6.27) >> +% 419 0 obj +(\376\377\0006\000.\0002\0007\000\040\000S\000o\000r\000t\000i\000n\000g\000\040\000u\000t\000i\000l\000i\000t\000i\000e\000s\000\040\040\024\000\040) +% 420 0 obj +<< /S /GoTo /D (section.7) >> +% 423 0 obj +(\376\377\0007\000\040\000P\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t\000\040\000r\000o\000u\000t\000i\000n\000e\000s) +% 424 0 obj +<< /S /GoTo /D (subsection.7.1) >> +% 427 0 obj +(\376\377\0007\000.\0001\000\040\000p\000s\000b\000\137\000i\000n\000i\000t\000\040\040\024\000\040\000I\000n\000i\000t\000i\000a\000l\000i\000z\000e\000s\000\040\000P\000S\000B\000L\000A\000S\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 428 0 obj +<< /S /GoTo /D (subsection.7.2) >> +% 431 0 obj +(\376\377\0007\000.\0002\000\040\000p\000s\000b\000\137\000i\000n\000f\000o\000\040\040\024\000\040\000R\000e\000t\000u\000r\000n\000\040\000i\000n\000f\000o\000r\000m\000a\000t\000i\000o\000n\000\040\000a\000b\000o\000u\000t\000\040\000P\000S\000B\000L\000A\000S\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 432 0 obj +<< /S /GoTo /D (subsection.7.3) >> +% 435 0 obj +(\376\377\0007\000.\0003\000\040\000p\000s\000b\000\137\000e\000x\000i\000t\000\040\040\024\000\040\000E\000x\000i\000t\000\040\000f\000r\000o\000m\000\040\000P\000S\000B\000L\000A\000S\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 436 0 obj +<< /S /GoTo /D (subsection.7.4) >> +% 439 0 obj +(\376\377\0007\000.\0004\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000m\000p\000i\000\137\000c\000o\000m\000m\000\040\040\024\000\040\000G\000e\000t\000\040\000t\000h\000e\000\040\000M\000P\000I\000\040\000c\000o\000m\000m\000u\000n\000i\000c\000a\000t\000o\000r) +% 440 0 obj +<< /S /GoTo /D (subsection.7.5) >> +% 443 0 obj +(\376\377\0007\000.\0005\000\040\000p\000s\000b\000\137\000g\000e\000t\000\137\000m\000p\000i\000\137\000r\000a\000n\000k\000\040\040\024\000\040\000G\000e\000t\000\040\000t\000h\000e\000\040\000M\000P\000I\000\040\000r\000a\000n\000k) +% 444 0 obj +<< /S /GoTo /D (subsection.7.6) >> +% 447 0 obj +(\376\377\0007\000.\0006\000\040\000p\000s\000b\000\137\000w\000t\000i\000m\000e\000\040\040\024\000\040\000W\000a\000l\000l\000\040\000c\000l\000o\000c\000k\000\040\000t\000i\000m\000i\000n\000g) +% 448 0 obj +<< /S /GoTo /D (subsection.7.7) >> +% 451 0 obj +(\376\377\0007\000.\0007\000\040\000p\000s\000b\000\137\000b\000a\000r\000r\000i\000e\000r\000\040\040\024\000\040\000S\000i\000n\000c\000h\000r\000o\000n\000i\000z\000a\000t\000i\000o\000n\000\040\000p\000o\000i\000n\000t\000\040\000p\000a\000r\000a\000l\000l\000e\000l\000\040\000e\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t) +% 452 0 obj +<< /S /GoTo /D (subsection.7.8) >> +% 455 0 obj +(\376\377\0007\000.\0008\000\040\000p\000s\000b\000\137\000a\000b\000o\000r\000t\000\040\040\024\000\040\000A\000b\000o\000r\000t\000\040\000a\000\040\000c\000o\000m\000p\000u\000t\000a\000t\000i\000o\000n) +% 456 0 obj +<< /S /GoTo /D (subsection.7.9) >> +% 459 0 obj +(\376\377\0007\000.\0009\000\040\000p\000s\000b\000\137\000b\000c\000a\000s\000t\000\040\040\024\000\040\000B\000r\000o\000a\000d\000c\000a\000s\000t\000\040\000d\000a\000t\000a) +% 460 0 obj +<< /S /GoTo /D (subsection.7.10) >> +% 463 0 obj +(\376\377\0007\000.\0001\0000\000\040\000p\000s\000b\000\137\000s\000u\000m\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000s\000u\000m) +% 464 0 obj +<< /S /GoTo /D (subsection.7.11) >> +% 467 0 obj +(\376\377\0007\000.\0001\0001\000\040\000p\000s\000b\000\137\000m\000a\000x\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000a\000x\000i\000m\000u\000m) +% 468 0 obj +<< /S /GoTo /D (subsection.7.12) >> +% 471 0 obj +(\376\377\0007\000.\0001\0002\000\040\000p\000s\000b\000\137\000m\000i\000n\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000i\000n\000i\000m\000u\000m) +% 472 0 obj +<< /S /GoTo /D (subsection.7.13) >> +% 475 0 obj +(\376\377\0007\000.\0001\0003\000\040\000p\000s\000b\000\137\000a\000m\000x\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000a\000x\000i\000m\000u\000m\000\040\000a\000b\000s\000o\000l\000u\000t\000e\000\040\000v\000a\000l\000u\000e) +% 476 0 obj +<< /S /GoTo /D (subsection.7.14) >> +% 479 0 obj +(\376\377\0007\000.\0001\0004\000\040\000p\000s\000b\000\137\000a\000m\000n\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\000m\000i\000n\000i\000m\000u\000m\000\040\000a\000b\000s\000o\000l\000u\000t\000e\000\040\000v\000a\000l\000u\000e) +% 480 0 obj +<< /S /GoTo /D (subsection.7.15) >> +% 483 0 obj +(\376\377\0007\000.\0001\0005\000\040\000p\000s\000b\000\137\000n\000r\000m\0002\000\040\040\024\000\040\000G\000l\000o\000b\000a\000l\000\040\0002\000-\000n\000o\000r\000m\000\040\000r\000e\000d\000u\000c\000t\000i\000o\000n) +% 484 0 obj +<< /S /GoTo /D (subsection.7.16) >> +% 487 0 obj +(\376\377\0007\000.\0001\0006\000\040\000p\000s\000b\000\137\000s\000n\000d\000\040\040\024\000\040\000S\000e\000n\000d\000\040\000d\000a\000t\000a) +% 488 0 obj +<< /S /GoTo /D (subsection.7.17) >> +% 491 0 obj +(\376\377\0007\000.\0001\0007\000\040\000p\000s\000b\000\137\000r\000c\000v\000\040\040\024\000\040\000R\000e\000c\000e\000i\000v\000e\000\040\000d\000a\000t\000a) +% 492 0 obj +<< /S /GoTo /D (section.8) >> +% 495 0 obj +(\376\377\0008\000\040\000E\000r\000r\000o\000r\000\040\000h\000a\000n\000d\000l\000i\000n\000g) +% 496 0 obj +<< /S /GoTo /D (subsection.8.1) >> +% 499 0 obj +(\376\377\0008\000.\0001\000\040\000p\000s\000b\000\137\000e\000r\000r\000p\000u\000s\000h\000\040\040\024\000\040\000P\000u\000s\000h\000e\000s\000\040\000a\000n\000\040\000e\000r\000r\000o\000r\000\040\000c\000o\000d\000e\000\040\000o\000n\000t\000o\000\040\000t\000h\000e\000\040\000e\000r\000r\000o\000r\000\040\000s\000t\000a\000c\000k) +% 500 0 obj +<< /S /GoTo /D (subsection.8.2) >> +% 503 0 obj +(\376\377\0008\000.\0002\000\040\000p\000s\000b\000\137\000e\000r\000r\000o\000r\000\040\040\024\000\040\000P\000r\000i\000n\000t\000s\000\040\000t\000h\000e\000\040\000e\000r\000r\000o\000r\000\040\000s\000t\000a\000c\000k\000\040\000c\000o\000n\000t\000e\000n\000t\000\040\000a\000n\000d\000\040\000a\000b\000o\000r\000t\000s\000\040\000e\000x\000e\000c\000u\000t\000i\000o\000n) +% 504 0 obj +<< /S /GoTo /D (subsection.8.3) >> +% 507 0 obj +(\376\377\0008\000.\0003\000\040\000p\000s\000b\000\137\000s\000e\000t\000\137\000e\000r\000r\000v\000e\000r\000b\000o\000s\000i\000t\000y\000\040\040\024\000\040\000S\000e\000t\000s\000\040\000t\000h\000e\000\040\000v\000e\000r\000b\000o\000s\000i\000t\000y\000\040\000o\000f\000\040\000e\000r\000r\000o\000r\000\040\000m\000e\000s\000s\000a\000g\000e\000s) +% 508 0 obj +<< /S /GoTo /D (subsection.8.4) >> +% 511 0 obj +(\376\377\0008\000.\0004\000\040\000p\000s\000b\000\137\000s\000e\000t\000\137\000e\000r\000r\000a\000c\000t\000i\000o\000n\000\040\040\024\000\040\000S\000e\000t\000\040\000t\000h\000e\000\040\000t\000y\000p\000e\000\040\000o\000f\000\040\000a\000c\000t\000i\000o\000n\000\040\000t\000o\000\040\000b\000e\000\040\000t\000a\000k\000e\000n\000\040\000u\000p\000o\000n\000\040\000e\000r\000r\000o\000r\000\040\000c\000o\000n\000d\000i\000t\000i\000o\000n) +% 512 0 obj +<< /S /GoTo /D (section.9) >> +% 515 0 obj +(\376\377\0009\000\040\000U\000t\000i\000l\000i\000t\000i\000e\000s) +% 516 0 obj +<< /S /GoTo /D (subsection.9.1) >> +% 519 0 obj +(\376\377\0009\000.\0001\000\040\000\040\000h\000b\000\137\000r\000e\000a\000d\000\040\040\024\000\040\000R\000e\000a\000d\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000H\000a\000r\000w\000e\000l\000l\040\023\000B\000o\000e\000i\000n\000g\000\040\000f\000o\000r\000m\000a\000t) +% 520 0 obj +<< /S /GoTo /D (subsection.9.2) >> +% 523 0 obj +(\376\377\0009\000.\0002\000\040\000h\000b\000\137\000w\000r\000i\000t\000e\000\040\040\024\000\040\000W\000r\000i\000t\000e\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000t\000o\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000H\000a\000r\000w\000e\000l\000l\040\023\000B\000o\000e\000i\000n\000g\000\040\000f\000o\000r\000m\000a\000t) +% 524 0 obj +<< /S /GoTo /D (subsection.9.3) >> +% 527 0 obj +(\376\377\0009\000.\0003\000\040\000m\000m\000\137\000m\000a\000t\000\137\000r\000e\000a\000d\000\040\040\024\000\040\000R\000e\000a\000d\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 528 0 obj +<< /S /GoTo /D (subsection.9.4) >> +% 531 0 obj +(\376\377\0009\000.\0004\000\040\000m\000m\000\137\000a\000r\000r\000a\000y\000\137\000r\000e\000a\000d\000\040\040\024\000\040\000R\000e\000a\000d\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000a\000r\000r\000a\000y\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 532 0 obj +<< /S /GoTo /D (subsection.9.5) >> +% 535 0 obj +(\376\377\0009\000.\0005\000\040\000m\000m\000\137\000m\000a\000t\000\137\000w\000r\000i\000t\000e\000\040\040\024\000\040\000W\000r\000i\000t\000e\000\040\000a\000\040\000s\000p\000a\000r\000s\000e\000\040\000m\000a\000t\000r\000i\000x\000\040\000t\000o\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 536 0 obj +<< /S /GoTo /D (subsection.9.6) >> +% 539 0 obj +(\376\377\0009\000.\0006\000\040\000m\000m\000\137\000a\000r\000r\000a\000y\000\137\000w\000r\000i\000t\000e\000\040\040\024\000\040\000W\000r\000i\000t\000e\000\040\000a\000\040\000d\000e\000n\000s\000e\000\040\000a\000r\000r\000a\000y\000\040\000f\000r\000o\000m\000\040\000a\000\040\000f\000i\000l\000e\000\040\000i\000n\000\040\000t\000h\000e\000\040\000M\000a\000t\000r\000i\000x\000M\000a\000r\000k\000e\000t\000\040\000f\000o\000r\000m\000a\000t) +% 540 0 obj +<< /S /GoTo /D (section.10) >> +% 543 0 obj +(\376\377\0001\0000\000\040\000P\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000r\000o\000u\000t\000i\000n\000e\000s) +% 544 0 obj +<< /S /GoTo /D (subsection.10.1) >> +% 547 0 obj +(\376\377\0001\0000\000.\0001\000\040\000i\000n\000i\000t\000\040\040\024\000\040\000I\000n\000i\000t\000i\000a\000l\000i\000z\000e\000\040\000a\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 548 0 obj +<< /S /GoTo /D (subsection.10.2) >> +% 551 0 obj +(\376\377\0001\0000\000.\0002\000\040\000b\000u\000i\000l\000d\000\040\040\024\000\040\000B\000u\000i\000l\000d\000s\000\040\000a\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 552 0 obj +<< /S /GoTo /D (subsection.10.3) >> +% 555 0 obj +(\376\377\0001\0000\000.\0003\000\040\000a\000p\000p\000l\000y\000\040\040\024\000\040\000P\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r\000\040\000a\000p\000p\000l\000i\000c\000a\000t\000i\000o\000n\000\040\000r\000o\000u\000t\000i\000n\000e) +% 556 0 obj +<< /S /GoTo /D (subsection.10.4) >> +% 559 0 obj +(\376\377\0001\0000\000.\0004\000\040\000d\000e\000s\000c\000r\000\040\040\024\000\040\000P\000r\000i\000n\000t\000s\000\040\000a\000\040\000d\000e\000s\000c\000r\000i\000p\000t\000i\000o\000n\000\040\000o\000f\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 560 0 obj +<< /S /GoTo /D (subsection.10.5) >> +% 563 0 obj +(\376\377\0001\0000\000.\0005\000\040\000c\000l\000o\000n\000e\000\040\040\024\000\040\000c\000l\000o\000n\000e\000\040\000c\000u\000r\000r\000e\000n\000t\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 564 0 obj +<< /S /GoTo /D (subsection.10.6) >> +% 567 0 obj +(\376\377\0001\0000\000.\0006\000\040\000f\000r\000e\000e\000\040\040\024\000\040\000F\000r\000e\000e\000\040\000a\000\040\000p\000r\000e\000c\000o\000n\000d\000i\000t\000i\000o\000n\000e\000r) +% 568 0 obj +<< /S /GoTo /D (section.11) >> +% 571 0 obj +(\376\377\0001\0001\000\040\000I\000t\000e\000r\000a\000t\000i\000v\000e\000\040\000M\000e\000t\000h\000o\000d\000s) +% 572 0 obj +<< /S /GoTo /D (subsection.11.1) >> +% 575 0 obj +(\376\377\0001\0001\000.\0001\000\040\000p\000s\000b\000\137\000k\000r\000y\000l\000o\000v\000\040\000\040\040\024\000\040\000K\000r\000y\000l\000o\000v\000\040\000M\000e\000t\000h\000o\000d\000s\000\040\000D\000r\000i\000v\000e\000r\000\040\000R\000o\000u\000t\000i\000n\000e) +% 576 0 obj +<< /S /GoTo /D (section.12) >> +% 579 0 obj +(\376\377\0001\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 580 0 obj +<< /S /GoTo /D (subsection.12.1) >> +% 583 0 obj +(\376\377\0001\0002\000.\0001\000\040\000U\000s\000i\000n\000g\000\040\000t\000h\000e\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 584 0 obj +<< /S /GoTo /D (subsection.12.2) >> +% 587 0 obj +(\376\377\0001\0002\000.\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s\000'\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000s) +% 588 0 obj +<< /S /GoTo /D (subsection.12.3) >> +% 591 0 obj +(\376\377\0001\0002\000.\0003\000\040\000C\000P\000U\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 592 0 obj +<< /S /GoTo /D (subsection.12.4) >> +% 595 0 obj +(\376\377\0001\0002\000.\0004\000\040\000C\000U\000D\000A\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s) +% 596 0 obj +<< /S /GoTo /D (section.13) >> +% 599 0 obj +(\376\377\0001\0003\000\040\000C\000U\000D\000A\000\040\000E\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t\000\040\000R\000o\000u\000t\000i\000n\000e\000s) +% 600 0 obj +<< /S /GoTo /D (section*.6) >> +% 603 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000i\000n\000i\000t) endstream endobj -581 0 obj +662 0 obj << -/Length 728 +/Length 729 >> stream 0 g 0 G @@ -453,14 +674,14 @@ stream 0 g 0 G 0 g 0 G BT -/F51 24.7871 Tf 169.511 626.367 Td [(PSBLAS)-250(3.8.0)-250(User)-55(')55(s)-250(guide)]TJ +/F59 24.7871 Tf 169.511 626.367 Td [(PSBLAS)-250(3.9.0)-250(User)-55(')55(s)-250(guide)]TJ ET q 1 0 0 1 125.3 609.739 cm 0 0 343.711 4.981 re f Q BT -/F52 14.3462 Tf 156.541 586.546 Td [(A)-250(r)18(efer)18(ence)-250(guide)-250(for)-250(the)-250(Parallel)-250(Sparse)-250(BLAS)-250(library)]TJ +/F60 14.3462 Tf 156.541 586.546 Td [(A)-250(r)18(efer)18(ence)-250(guide)-250(for)-250(the)-250(Parallel)-250(Sparse)-250(BLAS)-250(library)]TJ 0 g 0 G 0 g 0 G 0 g 0 G @@ -480,14 +701,14 @@ Q 0 g 0 G 1 0 0 1 -168.637 -345.042 cm BT -/F51 9.9626 Tf 365.51 263.977 Td [(by)-250(Salvatore)-250(Filippone)]TJ 14.396 -11.956 Td [(and)-250(Alfredo)-250(Buttari)]TJ/F54 9.9626 Tf 29.31 -11.955 Td [(May)-250(1st,)-250(2022)]TJ +/F59 9.9626 Tf 365.51 263.977 Td [(by)-250(Salvatore)-250(Filippone)]TJ 14.396 -11.956 Td [(and)-250(Alfredo)-250(Buttari)]TJ/F62 9.9626 Tf 29.957 -11.955 Td [(Aug)-250(1st,)-250(2024)]TJ 0 g 0 G 0 g 0 G ET endstream endobj -578 0 obj +659 0 obj << /Type /XObject /Subtype /Image @@ -495,14 +716,14 @@ endobj /Height 480 /BitsPerComponent 8 /ColorSpace /DeviceRGB -/SMask 588 0 R +/SMask 669 0 R /Length 921600 >> stream ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþýýýýýýýýýþþþÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýýûûûûûûûûûûûûúúúúúúøøø÷÷÷÷÷÷÷÷÷öööõõõõõõððððððððððððððððððíííùùùþþþÿÿÿÿÿÿþþþùùùæææäääááááááááááááááááááÝÝÝÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÛÛÛÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÔÔÔÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÍÍÍÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌËËËÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÄÄĽ½½½½½½½½½½½½½½½½½ººº¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôþþþÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿþþþûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···üüüûûýüüþýýþýýþüüþüüþüüþüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿûûýòòøòòøòòøòòøòñøòñøéèóäãñäãñäãñãâðâáïàßîÓÑçÓÑçÓÑçÓÑçÑÏæÑÏæÉÇâ¿Þ¿Þ¿Þûûýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄýýþÑÎ棟ͣŸÍ£ŸÍ£ŸÍ£ŸÍ™”ȔŔŔŔŔŒŽÄ…€½…€½…€½…€½…€½…€½~x¹vpµvpµvpµvpµvpµvpµhb­f`¬f`¬f`¬f`¬f`¬b[ªWP¤WP¤WP¤WP¤WP¤WP¤LDžH@œH@œH@œH@œH@œF>›90”90”90”90”90”90”0') ‹) ‹) ‹) ‹) ‹) ‹„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•Æÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³õõõùùü7.“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúîíö'ŠƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞÿÿÿvpµƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ûûüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿üüþ½ºÛƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³òòòýýþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉþþþŸ›ËƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùóòø,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿ|»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüãâ𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üûýÌÊッƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ïïïþþÿJBƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆþýþ«§Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³öö÷ùøû5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³úúûêéô$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿpj²ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¾¼ÜýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûýÖÔ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2•ûúýÿÿÿÿÿÿþþþììì³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀýüþ·³Øƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ôôôüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌþþÿš•ÈƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùòñø+"ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüÞÝí…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççÿÿÿ_X¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½üüýÅÃàƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ñññþþþG?›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆþþþ¥¡Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š$Š,#+"$Š†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³÷÷÷ööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$ŠJC¤fa·~Ê—•Ù¨§äµ´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é«ªæ›ÜŠ‡ÐxtÃgb¸UO«B;Ÿ) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿˆƒ¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ2*”gb¸š˜Ú··î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æÓvrÂYT¯=5›†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúûèçò#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIB£‘ŽÔ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³²ë’Õlh¼H@¡#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ãããÿÿÿkd¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;3™”’׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶îlg»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüýÐÎ僃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„id¹¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtõ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííÿÿÿPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆŒÒ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š—•Ù¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtˆÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³õõõûûý91”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ŒÒ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtà †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎþþÿ”Ńƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒql¾¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ<5›¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtþ¼ÜýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿwq¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ:2•ûúýÿÿÿÿÿÿþþþîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ûúüÜÚì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC<Ÿ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÕÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒʸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï£¢áŽ‹ÒyuÄni¼gb¸d_¶d_¶hc¸rm¿|É‘ŽÔª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿ýüþ¾»Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“×UO«*!Žƒƒƒƒƒƒƒƒƒƒƒƒ) G?¡d_¶~Ê©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³òòòýýþB9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIB£¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨åF?¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„;3šid¹—•Ø··î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃÇÅáüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉþþþ œËƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi½¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç/&‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) \V°›™Û¸¸ï¸¸ï¸¸ïxtÃB9˜üüýÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùõõú/&ŽƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠˆÐ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïTN«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;4š{wÅ°°êxtàœËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿƒ~¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ žÞ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­è†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰91˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµûûüäã𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»ûûýÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ ‡¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ïïïþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì$Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦ÑýýþÿÿÿÿÿÿÿÿÿÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆýýþ¬¨Òƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïrnÀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³öööùøû5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïd_¶„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€{ºÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ’Õ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•ØMF¦…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…Ø×êüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúíìõ&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsoÀ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨åzvÄLE¥&‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVO¤ÿÿÿÿÿÿÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒNH§¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïœÝ{wÅ[U°<5›†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ýýþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íš˜Û{wÅ[U¯:2™„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+‘÷÷ûþþþÿÿÿþþþððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ€}ɸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ëŽ‹Òid¹D= †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀüüý¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ;4š··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê}yÇE>¡„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ †àßîûûýÿÿÿÿÿÿþþþùùù¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³óóóüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„€Ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï©¨årnÀ1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿþþþÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌþþÿš–Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ*!Ž¬«æ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··îƒÊ1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¾¼ÜýýþÿÿÿÿÿÿþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùòñø+"ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@¡µµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ìoj½†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2•ûúýÿÿÿÿÿÿþþþîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿ~x¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒUO¬¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï›Ü1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÆÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúüâàƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒLE¥°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççÿÿÿ`Y©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*“š˜Ú¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç4+”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½üüýÈÅჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…`Z²­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ñññþþþH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰hc¸¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆýýþ¦¢Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„HA¢…‚̵´í¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³÷÷÷ööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&ŒQJ¨{wÅ¥¤â¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿ‰„¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“YS®}yÆŸžß··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï«ªæ„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµúúûéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$ŠH@¡jeºŠÑ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï?7œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþÿÿÿÿÿÿþþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿmf°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…;3šb]´‘ŽÔ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_µƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒPH¡ÿÿÿþþþþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþûûûûûûûûûûûûúúúúúúøøø÷÷÷÷÷÷÷÷÷õõõõõõõõõððððððððððððððððððíííëëëëëëëëëëëëëëëîîîÿÿÿÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµ½½½½½½½½½½½½½½½½½½ÇÇÇüüýÑÏ惃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(`Z²¢¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„€Ëƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¦¢ÏÿÿÿçççááááááÝÝÝÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÛÛÛÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÔÔÔÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÍÍÍÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌËËËÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÄÄĽ½½½½½½½½½½½½½½½½½»»»¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþØØØÎÎÎÒÒÒÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÜÜÜÝÝÝÝÝÝÝÝÝÝÝÝÝÝÝßßßããããããããããããããããããèèèééééééééééééééééééïïïïïïïïïïïïïïïïïïñññóóóóóóóóóóóóóóóóóóööö÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷÷úúúúúúúúúúúúúúúúúúûûûüüüûûûúùûúùûúùûúúûüûüüûüûûýúúüúúüúúüûûüýüýýüýüûýüûýüûýüûýþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿaZ©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„`Z²²²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïš˜Úƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-#üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿýüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýþóòøóòøóòøóòøòñøòñøêéóåäñåäñåäñãâðâáïâáïÕÓèÕÓèÕÓèÕÓèÒÐæÑÏæÌÊã¿Þ¿Þ¿Þ¿Þ¿Þ¿޶³Ø³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö°­Ô£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍš–ɔŔŔŔŔŔŅ€½…€½…€½…€½…€½…€½y¹vpµŠ„¿ýýþ²¯ÕƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒA:ž³²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©åƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿ‡¾WP¤WP¤WP¤WP¤WP¤WP¤OG H@œH@œH@œH@œH@œH@œ91”90”90”90”90”90”3*‘) ‹) ‹) ‹) ‹) ‹) ‹…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÄàÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒid¹¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ìƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿüüýÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Œ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßïþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿþþÿIAœƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–’Æÿÿÿ~x¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒœ›Ý¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿýýþª¦Ðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠÑ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯®éƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷ùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅþþþÿÿÿþþþùøü4+‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMF¦MF¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¥¤âƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ˜”ÇÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï›™ÛLE¥„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰úùüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýþéèó$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¡ŸßZT¯ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKD¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¹ÚÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«çrm¿5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰£¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïb]´ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿüüþÕÓ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷ðð÷+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï–”Ø^X±+"ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ)“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï?7œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒØÖéþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿÿÿÿVO¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï–”Øid¹A9ž†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†^X±©©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U¦ÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿýýþ¶³ØƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙØëýýþD<šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³²ë—”ØxtÃ\V°G?¡5,•#‰ƒƒƒƒƒ(>6›XR®ƒʯ®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëôüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿûûý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë®­è±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG?¡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿþþþ˜“ǃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …÷÷úöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿüüþññ÷*!Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿÿÿÿ{u¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒwqµÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‹ˆÐƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿüüýßÝî…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä*!ŽƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊäÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾Ýƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë;3šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKCžþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿüüýÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¼ºÛÿÿÿ^W¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êC<Ÿƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒäãðýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿþþÿG?›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰÷÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\V°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞ6.–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþÿ¤ Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„¼ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“d_¶—•Ø··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ítpÁ ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒóòøùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿþþþööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-$`[³“‘Ö¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµµí„€Ë2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŸ›ËÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰G?¡lg»‘Ô²²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞjeº+"ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(‹ûûüñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿýýþéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…<4š`Z²wsÊѣ¢á¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î¢ ß|È\V°2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÁ¾ÝÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿÿÿÿjc®ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†2*”HA¢WQ­b]µmi½xtÃxtÃ}yÆ~ÊxtÃxtÃql¾fa·YS®KD¤6-•†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜þþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿüüýÏÍ僃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÛÚìþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿþþþPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb[ªÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅþþþÿÿÿÿÿÿýýþ°­Ôƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒîíõûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿûúý91”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŽ‰ÂÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡ùùüõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿþþÿîíö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¤ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ´±ÖÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿÿÿÿvpµƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷îíö(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ýýýììì³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿüüþÛÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜ÊƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚØëýýþD<šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿýýþ¾»Ûƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ6.–:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™:2™80˜0'’)„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿýýþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶î©©å•“×zvÄ`[³<4š…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzt·ÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþÿŸ›Ëƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êˆ…ÏSLª#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õõùøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿþþÿôôù/&Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì~{È3*“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒwqµÿÿÿ‘ăƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡ÎÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿÿÿÿ‚}»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î~{È'Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûûüñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿýýþãâ𠆃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æJC£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾ÝƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïni¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ½ºÛÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿüüþÎÌ䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï|É…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰÷÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàÞîþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþJBƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï{wŃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„¼ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿýýþ«§Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒéèóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒðï÷úúú¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿùøü5,’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì5-–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘ÆÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿŽ‰Âƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“׃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰úùüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿüüþêéô$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG@¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒº·ÚÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿqk²ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï}yÇNH§NH§NH§NH§NH§NH§NH§NH§NH§UO«_Y²lh¼ŠÑ¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ80“ýýþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿüüýÖÔ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒ†UO«œÝ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï0'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒììõõôù0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÖÔéþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿÿÿÿWP¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†rm¿¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ[T¦ÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿýýþ¸µÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŒ‰ÑƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒëêôüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ“‘Ö¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿþþþš•Èƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒA9¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï/&‘ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ø÷û÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿþþÿòòø+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïC<Ÿƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒñð÷îíö(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¬©ÒÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿÿÿÿÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïSLªƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&üüýïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿüüýßÝî…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒlh¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚØëýýþC;™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒËÉãÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿÿÿÿÿÿÿ_X¨ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ`[³¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ûûýÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒIAþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿüüýÆÄჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒgb¸¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï]W±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ±­Õÿÿÿhb­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒãâðýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþÿH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒtpÁ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïVP¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …õõúèçò%‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒrl³ÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿýýþ¥¡Îƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒxr¶ÿÿÿ‘ăƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒóòøùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿþþþööú0'ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œ¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï7/—ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒâáïûûý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ˜ÊÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿÿÿÿˆƒ¿ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒni¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC:šüüýÁ¾Ýƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Šúúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿýýþéèó#ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ6.–³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ½ºÛÿÿÿ]V§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÀ½ÝÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿÿÿÿÿÿÿle¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃ$‰ø÷ûâáï!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜þþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿüüýÐÎ惃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…XR®¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïHA¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„½ÿÿÿˆ‚¾ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÚÙëþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿÿÿÿPH ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒ"ˆC< jeº¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒéèóøøû6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ_X¨ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿýýþ³¯Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à©¨å³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïmi¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸýýþ±®Õƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿúúü:2•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë'ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÈÅáÿÿÿTM£ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŒ‡ÁÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿþþþ”Ńƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïc^µƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ùùüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ùùüõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿþþÿïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•Ø…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿ}w¸ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ²®ÕÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿwq¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ/'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒììõõôù0&ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ýýýîîî³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿüüþÜÚì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ^V¨þþþ¦¢ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÎæÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³åååþþþÿÿÿÿÿÿÿÿÿ[S¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç<5›ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþKDžƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢þþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýþ¾¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï—•Ø/'’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÓÑ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒæåñýýý»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿýýþB9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ëgb¸…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¤ Îÿÿÿsm³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3)7/“7/“7/“7/“7/“>6–SL¢SL¢–‘ÆÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççþþþÿÿÿÿÿÿþþÿ¡Ìƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì{wÅ,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòñ÷ñð÷†¾„¼„¼„¼„¼„¼ˆÁ™•È™•È™•È™•È™•È™•Èª¦Ñ­©Ó­©Ó­©Ó­©Ó­©Ó±­Õ¾»Ü¾»Ü¾»Ü¾»Ü¾»Ü¾»ÜÈÆáÍËäÍËäÍËäÍËäÍËäÎÌåÚÙëÚÙëÚÙëÚÙëÚÙëÚÙëàÞîæäñæäñæäñæäñæäñæäñíìõïîöïîöïîöïîöïîöñð÷öõúöõúõôúôôùôôùôôù÷÷ûùùüùøûøøüøøüøøü÷÷ûüüýüüýûûýûûýûûýûûýýýþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýý···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÿÿÿþþÿôôù.%Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßhc¸)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–’ÆþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿöõúóòøóòøóòøòñøòñøïïöåäñåäñåäñåäñâáïâáïÛÙëÕÓèÕÓèÕÓèÓÑçÑÏæÑÏæ¿Þ¿Þ¿Þ¿Þ¿Þ¿޽ºÛ³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö§£Ï£ŸÍ£ŸÍ£ŸÍ£ŸÍ£ŸÍ¡Ì”ŔŔŔŔŔŋ†À…€½…€½…€½…€½…€½…€½vpµvpµvpµvpµ½ºÛÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©å‡„Î`[³5-–ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒìëôôóù\U§WP¤WP¤WP¤TM£H@œH@œH@œH@œH@œH@œ@7—90”90”90”90”90”90”*!Œ) ‹) ‹) ‹) ‹) ‹$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòò÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿ@7—ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï›™Û„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€Ë„€ËƒÊxtÃrnÀc^µUO«A9&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…½ÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ööúäãð!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ®«Ôÿÿÿf`¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(úúüÍË䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââþþþÿÿÿÿÿÿÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÍËäþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒQI¡ýýþ­©ÒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ìììÿÿÿÿÿÿÿÿÿýýþ®«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒåãñùøü6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷íìõ'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œÌÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ)‹ùùü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßþþþÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÂ¿ÞÿÿÿXQ¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜üüý¹¶ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÜÛìüüý<4•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒf`­þþÿ›—Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññþþþÿÿÿÿÿÿüüþíìõ(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒííöòñø+"Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþš•Èƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠÂÿÿÿy¹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùÿÿÿÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡÷öúáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒµ²×ÿÿÿaZ©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ7/“ûûýÈÅჃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÒÐçþþþH@œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÃÃÃþþþÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTL£ýýþ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòööú1(ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`[³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ}w¹ÿÿÿŠ…ÀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµöööÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùéèó$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ§£Ïÿÿÿmf°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûûÿÿÿÿÿÿÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+!ŒúùüÕÓ脃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáÿÿÿQI¡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿýýþ®«ÓƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œýýþ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßîûûý:2•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®þþÿ–‘ƃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒðð÷ïîö'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’Æÿÿÿys·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰øøûÜÛì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸Úÿÿÿ\T¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”üûýÅÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³æææþþþÿÿÿÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ×ÕéýýþB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿüüþíìõ(‹ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U¦þþþ¢žÌƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿþþþš•Èƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒëêôõõú0&ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÎÎÎÿÿÿÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|»ÿÿÿ…€½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ …ööúåãñ!‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ØØØÿÿÿÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓÿÿÿga­ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûþþþÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ1(ûûýÎÌ䃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââÿÿÿÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊäþþÿOG ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁþþþÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒKCžýýþ¯«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëþþþÿÿÿÿÿÿýýþ¹¶Ùƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒäãðùùü6-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsm³ÿÿÿ‘ŒÃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒòñ÷ííõ'Šƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒž™Êÿÿÿtn´ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ(‹ùùü×Õ鄃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞþþþÿÿÿÿÿÿýýþ®«ÓƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÁ¾ÝÿÿÿXQ¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒáàïþþþ¿¿¿³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ@8˜üüýº·ÚƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB:˜þþþêêê³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÛÙìüüþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸ÚÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ_X¨þþþœ˜Êƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡øøûõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿüüþòòø-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒíìõòòø,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ0'’91˜A:žA9:2™3*“(ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ~¼ÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿÿÿÿÿÿÿþþÿ£ŸÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŽ‰ÂÿÿÿzºƒƒƒƒƒtpÁxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃni¼lg»d_¶]W±G?¡0(“…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&ŒxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃ[U°ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ>6›xtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃMF¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š\V°ƒÊ£¢á¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ì£¡àŽ‹ÒsoÀXR®;4š†ƒƒƒƒƒƒƒƒƒƒèçòýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþÿJCƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡÷÷ûâáï †ƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï´³ìŠÑ`[³,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„̸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+"~{ȳ²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ì‘Õlg» ‡ƒƒƒƒƒƒƒMEŸþþþæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿÿÿÿüüýÒÐ焃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ³°Öÿÿÿb[ªƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ìvrÂ) ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï;3™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒWQ­¯®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒÆÄàÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿÿÿÿoi±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ5-’ûûýÇÅჃƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä80—ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\V°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïtpÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ`Z²¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒ* ‹ûúüóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿüüþíìõ'ŠƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæþþþIAœƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï­¬ç*!Žƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«æ†ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTMª··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒƒ•ÅÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂÿÿÿÿÿÿÿÿÿþþþ™”ȃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒSL¢ýýþ©¥Ðƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï†ƒÍƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*“¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïLE¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰ª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!Žƒƒƒƒƒƒƒƒƒƒíìõûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿÿÿÿüüýB:™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒçæòùøû4+‘ƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï0(“ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï†ƒÍƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjeº¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î«ªæ¢¡à®­è··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒƒƒWQ¤þþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿÿÿÿÿÿÿüüþÄÂ߃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿ‹†Àƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã0'’0'’0'’0'’0'’2*”C< `[³¥¤â¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïb]µƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì&ŒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒžÞ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïƒÊG@¢'Œƒƒƒƒƒ+"IB£fa·Œ‰Ñ³²ë¸¸ï¸¸ï¸¸ï¸¸ï*!ŽƒƒƒƒƒƒƒƒƒƒƒÐÍæÿÿÿÄÄij³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõÿÿÿÿÿÿÿÿÿÿÿÿe_¬ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ôôùéèó$ˆƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒ#‰š˜Û¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„̃ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï^X±ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&‘¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïOI¨ƒƒƒƒƒƒƒƒƒƒƒƒ!‡KD¤~Ê´³ì¸¸ï*!Žƒƒƒƒƒƒƒƒƒƒƒ1(üüýððð´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿÿÿÿýýþèçò$ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¥¡Îÿÿÿnh±ƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¢ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒG@¢¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œa\´$Šƒƒƒƒƒƒƒƒƒƒƒƒ£žÍÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹ûûûÿÿÿÿÿÿÿÿÿŠÂƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ* ‹úùüÕÓ脃ƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ†··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA:ž³³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒUO¬¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïfa·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒññ÷úúú¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿûúý;3•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÆÄàÿÿÿRJ¡ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ„¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–Ùƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒVP¬¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¡Ÿßƒ„€Ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk½ƒƒƒƒƒƒƒƒƒƒƒƒƒ`[³¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïmi½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒjc®ÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿýýþ¹¶ÙƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒH@œýýþµ²×ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ4+”¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï~ʃƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‘ŽÔ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïkfºƒNH§¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¨§ä„ƒƒƒƒƒƒƒƒƒƒƒƒXR®¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¥¤â"ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÙ×êþþþÁÁÁ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿÿÿÿÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒßÞîûûý;3•ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ~zǸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïWQ­ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ.%‘··ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï4+”ƒ†°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïG?¡ƒƒƒƒƒƒƒƒƒƒƒƒKD¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜJC¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”ýýýííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿÿÿÿüüýáàï †ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒib­þþÿ—’ǃƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒ4+”~{ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï«ªæ"ˆƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒgb¸¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïœšÜƒƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï~ʃƒƒƒƒƒƒƒƒƒƒƒ) ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã}yÇTMª1)“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¯«ÔÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´òòòþþþÿÿÿÿÿÿÿÿÿ„¼ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒïî÷ñð÷*!Œƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì˜–Ù˜–Ù˜–Ù˜–Ù˜–Ù žÞ´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïRK©ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¢ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_¶ƒƒƒIB£¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï³³ì#‰ƒƒƒƒƒƒƒƒƒƒƒƒœ›Ý¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯®éÓpk½LE¥&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…õôùøøø···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿþþþööú2)ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘Æÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïa\´ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ?7œ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï0'’ƒƒƒ†­¬ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïXR®ƒƒƒƒƒƒƒƒƒƒƒƒWQ­¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ïœ›Ýkfº2*”ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvpµÿÿÿÛÛÛ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿýýþ®«Óƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$‰øøûÝÜì…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–ÙA9ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒxtø¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï–“׃ƒƒƒƒ{wŸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï“‘Öƒƒƒƒƒƒƒƒƒƒƒƒ…š˜Ú¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘ÕA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒàßïþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¹¹¹½½½½½½½½½½½½½½½ÞÞÞÿÿÿÿÿÿÿÿÿÿÿÿSK¢ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¹·Ùÿÿÿ\U§ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´íxtÃ6.–ƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†®­è¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï`Z²ƒƒƒƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï/'’ƒƒƒƒƒƒƒƒƒƒƒƒ.%‘«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï„€Ë#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒC;™þþþëëëÇÇÇÇÇÇÇÇÇÉÉÉÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÓÓÓÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÙÙÙÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜááááááááááááááááááãããææææææææææææææææææêêêëëëëëëëëëëëëëëëìììððððððñññðððððððððôôôõõõööö÷÷÷÷÷÷÷÷÷öööúúúúúúûûûûûûûûûûûûýýýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüüýÚÙë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ91”üüý¿Þƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··îws†ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒPJ¨¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î*!Žƒƒƒƒƒ„©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïid¹ƒƒƒƒƒƒƒƒƒƒƒƒƒ/&‘›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¡Ÿß) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÌÊãÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþþþþþþþþþþÿÿÿþþþýýýýýýýýýýýýÿÿÿÿÿÿþþþþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿzt·ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&7/“7/“7/“7/“7/“;2•SL¢SL¢SL¢SL¢SL¢SL¢`Y©mf°mf°mf°mf°mf°mf°‚}¼„¼„¼„¼„¼„¼‹…¿™•È™•È™•È™•È™•È™•È§£Ï­©Ó­©Ó­©Ó­©Ó­©Ó®ªÓ¾»ÜðïöýýþC;™ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘Ô†ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠˆÐ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‘ŽÔƒƒƒƒƒƒƒvr¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï£¢áƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†f`¶°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï•“׆ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ+"ŒòòøþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüüþøøûÍËäÙØêÚÙëÚÙëÚÙëÚÙëÚÙëÞÝíæäñæäñæäñæäñæäñæäñììõïîöïîöïîöïîöïîöðï÷öõúöõúöõùööúööúööúøøûúúüúúüûûýûûýûûýûûýýýþýýþýýþýýýýýýýýýþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿúúü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµµí¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à©¨å¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï}yǃƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) µµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒ?7œ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&‹`[³œÝ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ™Êþþþÿÿÿÿÿÿÿÿÿûûûûûûûûûûûûúúúúúú÷÷÷÷÷÷÷÷÷÷÷÷õõõõõõóóóñññññññññðððððððððìììëëëëëëëëëëëëëëëêêêææææææææææææææææææâââáááááááááááááááàààÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÙÙÙÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÒÒÒÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÐÐÐÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÉÉÉÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÀÀÀ½½½½½½½½½½½½½½½½½½¹¹¹¸¸¸¸¸¸ÎÎÎÿÿÿµ±×ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒ ‡F?¡Œ‰Ð¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb]´¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í&Œƒƒƒƒƒƒƒ„¦¥ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï{wŃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0(“YT¯„€Ë¨§ä¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5–ûûýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒoj½¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‡„΃ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŠˆÐƒƒƒƒƒƒƒƒƒql¾¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¯¯é ‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ,#OI¨tpÁ¡Ÿß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï5,•ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¼¹Úýýþÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒ…£¡à¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í ‡ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ80˜¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŠ‡Ðd_¶d_¶d_¶d_¶d_¶d_¶d_¶d_¶d_¶|xƸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïSL©ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’c^µ©¨å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïYT¯ƒƒƒƒƒƒƒƒƒƒƒƒƒUN£ÿÿÿÿÿÿÿÿÿÿÿÿÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒrm¿¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï>6›ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒsoÀ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŒ‰Ñƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ$Š•“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïjeºƒƒƒƒƒƒƒƒƒƒƒƒ„ÔÒèüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒ_Y²¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïPI¨ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…ª©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¶¶î*!Žƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ4+”¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïvrƒƒƒƒƒƒƒƒƒƒƒƒuo´ÿÿÿÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ¨¤Ðƒƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒd_¶¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒJC¤¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïd_¶ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïql¾ƒƒƒƒƒƒƒƒƒƒƒ%‰ëêôüüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒƒ€}ɸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïOI¨ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„̸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïžÞƒƒƒƒƒƒƒ) b]´"ˆƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¬«ç¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïe_µƒƒƒƒƒƒƒƒƒƒƒ‘ÄÿÿÿÿÿÿÿÿÿÿÿÿÊÊʳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒƒ) °°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9žƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹´³ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï;3šƒƒƒƒƒƒ0'’¸¸ï­¬çjeº&Œƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ:2™¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïWQ­ƒƒƒƒƒƒƒƒƒƒ3*‘øøûÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£áƒƒƒƒƒƒƒƒƒ)–“׸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··î) ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïˆ…΃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ[U°¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïvrƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï´³ì†ƒÍMF¦†ƒƒƒƒƒƒƒƒƒƒƒ)›Ü¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï4+”ƒƒƒƒƒƒƒƒƒƒ¯¬Óýýþÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï§¦ã0'’0'’0'’0'’0'’1)“;3™NH§vr¯®é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï”’׃ƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à¢¡à›Üƒƒƒƒ—”ظ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­èxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtÃxtâ ß¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¬«ç†ƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­èˆ…Îf`¶C<Ÿ'Œƒƒƒƒ…5-–hc¸«ªæ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¦¥ãƒƒƒƒƒƒƒƒƒƒKDžþþþÿÿÿÿÿÿþþþááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–Ƀƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïZT¯ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒƒ3*“¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïxtÃZT¯¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïMF¦ƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï··ï¬«æ¢¡à§¦ãµ´ì¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïsoÀƒƒƒƒƒƒƒƒƒƒËÉãüüýÿÿÿþþþüüüººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï˜–Ù„ƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒƒmi¼¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïA9ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&Œµµí¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï‡„΃ƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï²²ë-$ƒƒƒƒƒƒƒƒƒib®ÿÿÿÿÿÿÿÿÿÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïª©å5,•ƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒ„§¦ã¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¦¥ã„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹‰Ñ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´í'Œƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï_Y²ƒƒƒƒƒƒƒƒƒ†âáïýýþÿÿÿÿÿÿÿÿÿøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¤£á2*”ƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒD= ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïpk½ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒUO«¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï^X±ƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïwsÆ€½ÿÿÿÿÿÿÿÿÿÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êgb¸ ‡ƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒƒ|ȸ¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï91˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ#‰³²ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï™—Úƒƒƒƒ&‹Ó¶¶î¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°ê^X±ƒƒƒƒƒƒƒƒƒƒ.%Žóóøþþÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒ°°ê¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï®­è†ƒÍSL©#‰ƒƒƒƒƒƒƒƒƒƒƒƒƒ0'’¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï°°êƒ"ˆ±±ë¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïŸžßƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†ƒÍ¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï6-•ƒƒƒƒƒ'ŒYS®Š‡Ð¯¯é¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï±±ë}yÆ,#ƒƒƒƒƒƒƒƒƒƒƒ¢žÍþþÿÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü'Šƒƒƒƒƒƒjeºni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼d_¶^X±WQ­C< ) ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‹ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼jeºƒ3*“ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼KD¤ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5›ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼ni¼A9ƒƒƒƒƒƒƒƒ„80—^X±}yÆ”’ת©å¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ï¸¸ïµ´ì™—Ú|xÆTN«"ˆƒƒƒƒƒƒƒƒƒƒƒƒ@7—üüýÿÿÿÿÿÿÿÿÿæææ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"ˆ0'’7/—>6›D= ;4š4+”+"…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÀ½Ýüüýÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\U§ÿÿÿÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÛÚìüüýÿÿÿÿÿÿúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzt·ÿÿÿÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ'Šíìõüüþÿÿÿÿÿÿþþþóóó´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ—’ÇþþþÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ8/“ûúýÿÿÿÿÿÿÿÿÿééé³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ´±ÖýýþÿÿÿÿÿÿÿÿÿÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÑÏæüüþÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£Ïƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒnh±ÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡çæòýýþÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‹†ÀÿÿÿÿÿÿÿÿÿÿÿÿÌÌ̳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žõõùþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¨¤ÐýýþÿÿÿÿÿÿþþþÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–ɃƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒE=šþþÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÄÁßüüýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒb[ªÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…àÞîüüþÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒzºÿÿÿÿÿÿÿÿÿÿÿÿÐÐг³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ñð÷þþÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒœ˜ÊþþþÿÿÿÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ=5–ûûýÿÿÿÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ»¸Úýýþÿÿÿÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒTM£ÿÿÿÿÿÿÿÿÿÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÓÑçüüýÿÿÿÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒtn´ÿÿÿÿÿÿÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰ëêôüüþÿÿÿÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ‘ŒÃÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ3*‘øøûÿÿÿÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ­ªÓþþÿÿÿÿÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒJCþþþÿÿÿÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÊÈâüüþÿÿÿþþþüüü»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒga­ÿÿÿÿÿÿÿÿÿÿÿÿØØس³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ†âáïýýþÿÿÿÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿe^«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ…€½ÿÿÿÿÿÿÿÿÿÿÿÿÎÎγ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúòñ÷ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ-$óóøþþÿÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿš–Ƀƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¢žÌþþþÿÿÿÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññüüý-$ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ?6—üüýÿÿÿÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÅÅÅÿÿÿÊÈ⃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ¿½Üýýþÿÿÿÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþÿYR¥ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ\T¦ÿÿÿÿÿÿÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûêê󃃃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÙØêüüþÿÿÿÿÿÿúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿˆÁƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒys·ÿÿÿÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôúúü&‰ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ&‰íìõüüþÿÿÿÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿ¿¼Üƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ–‘ÆÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþNGŸƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ8/“ûúýÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½ýýýäãðƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ³¯ÕýýþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿzºƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööùùü"‡ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÐÎæüüýÿÿÿÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÌÌÌÿÿÿ´±Öƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒmf°ÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþA9˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ"‡çæòýýþÿÿÿÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÜÚ샃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒŠ…ÀÿÿÿÿÿÿÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿrl³ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žõõúþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøöõù…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ§£ÏþþÿÿÿÿÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ§£ÏƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒD<šýýþÿÿÿÿÿÿÿÿÿäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³îîîýýý5-’ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÂ¿Þüüýÿÿÿÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÓÑ烃ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒaZ©ÿÿÿÿÿÿÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿd]«ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰) ‹) ‹) ‹) ‹) ‹+"Œ90”90”90”90”90”90”A8˜H@œH@œH@œH@œH@œH@œUN£WP¤WP¤WP¤WP¤WP¤\T¦f`¬f`¬f`¬f`¬f`¬f`¬qk²vpµvpµvpµvpµvpµwq¶…€½…€½…€½…€½àßîüüýÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¹¹¹úúúõôù£ŸÍ£ŸÍ£ŸÍ£ŸÍ¨¤Ð³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö³¯Ö½»Û¿Þ¿Þ¿Þ¿Þ¿ÞÃÀÞÑÏæÑÏæÓÑçÓÑçÓÑçÓÑçÛÚìâáïâáïäãñäãñäãñäãññð÷òñøòòøòòøòòøòòøööúÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþýýþýýþýýþùùüùùüùùüùùüüüýýýþûûýûûþûûþûûþûûýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿþþþôôôµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµ÷÷÷þþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³×××ÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³áááÿÿÿÿÿÿÿÿÿþþþÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÀÀÀþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþÿÿÿÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÊÊÊÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´óóóþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝþþþÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³çççþþþÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüþþþÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ãããþþþÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ìììþþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµõõõþþþÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÕÕÕÿÿÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸ûûûÿÿÿÿÿÿþþþÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ßßßÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ñññþþþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùÿÿÿÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÛÛÛÿÿÿÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»üüüÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÃÃÃþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³îîîþþþÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÍÍÍÿÿÿÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµöööÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³×××ÿÿÿÿÿÿÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêþþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´óóóþþþÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÓÓÓÿÿÿÿÿÿÿÿÿþþþÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³æææþþþÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿÿÿÿÿÿÿÃÃó³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ðððþþþÿÿÿÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶÷÷÷ÿÿÿÿÿÿÿÿÿþþþùùù···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿÿÿÿþþþÝÝݳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººûûûþþþÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³âââþþþÿÿÿÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÁÁÁÿÿÿÿÿÿÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ëëëþþþÿÿÿÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´õõõþþþÿÿÿÿÿÿýýý½½½³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÔÔÔÿÿÿÿÿÿÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúÿÿÿÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞþþþÿÿÿÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³½½½þþþÿÿÿÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèÿÿÿÿÿÿÿÿÿûûû»»»³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÆÆÆÿÿÿÿÿÿÿÿÿþþþããã³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿÿÿÿþþþÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÑÑÑÿÿÿÿÿÿÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ùùùþþþÿÿÿÿÿÿÿÿÿÒÒÒ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÚÚÚÿÿÿÿÿÿþþþûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³»»»ýýýÿÿÿÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääÿÿÿÿÿÿÿÿÿþþþ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸»»»½½½½½½½½½½½½½½½½½½ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÄÄÄÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇËËËÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌÍÍÍÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÔÔÔÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÞÞÞááááááááááááááááááäääæææææææææææææææçççëëëëëëëëëëëëëëëëëëîîîððððððþþþÿÿÿÿÿÿþþþüüüúúúúúúûûûûûûûûûûûûþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿþþþýýýýýýýýýþþþÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ endstream endobj -588 0 obj +669 0 obj << /Type /XObject /Subtype /Image @@ -538,7 +759,7 @@ stream ãýÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿüËXÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ<Áüÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿý¨+üÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿú#’þÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿþÿ‰íþýýüüüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýññññððéâââáßßÑÏÏÏÍÌÇ»»»»µN™™™™™ˆˆˆˆˆˆxwwwwwqffffffZUUUUUSDDDDDD;333333$""""" endstream endobj -591 0 obj +672 0 obj << /Length 78 >> @@ -547,13 +768,13 @@ stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 320.07 90.438 Td [(2)]TJ +/F62 9.9626 Tf 320.07 90.438 Td [(2)]TJ 0 g 0 G ET endstream endobj -637 0 obj +718 0 obj << /Length 15343 >> @@ -561,9 +782,9 @@ stream 0 g 0 G 0 g 0 G BT -/F51 14.3462 Tf 99.895 706.042 Td [(Contents)]TJ +/F59 14.3462 Tf 99.895 706.042 Td [(Contents)]TJ 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf 0 -22.894 Td [(1)-1000(Introduction)]TJ +/F59 9.9626 Tf 0 -22.894 Td [(1)-1000(Introduction)]TJ 0 g 0 G [-26723(1)]TJ 0 0 1 rg 0 0 1 RG @@ -571,7 +792,7 @@ BT 0 g 0 G [-24361(2)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.074 Td [(2.1)-1050(Basic)-250(Nomenclatur)18(e)]TJ +/F62 9.9626 Tf 14.944 -12.074 Td [(2.1)-1050(Basic)-250(Nomenclatur)18(e)]TJ 0 g 0 G [-339(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -606,11 +827,11 @@ BT [-2000(8)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -22.116 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ +/F59 9.9626 Tf -14.944 -22.116 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ 0 g 0 G [-19810(9)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.074 Td [(3.1)-1050(Descriptor)-250(data)-250(str)8(uctur)18(e)]TJ +/F62 9.9626 Tf 14.944 -12.074 Td [(3.1)-1050(Descriptor)-250(data)-250(str)8(uctur)18(e)]TJ 0 g 0 G [-369(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -631,14 +852,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 542.324 Td [(local)]TJ +/F62 9.9626 Tf 186.779 542.324 Td [(local)]TJ ET q 1 0 0 1 208.019 542.523 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 211.008 542.324 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(r)18(ows)]TJ +/F62 9.9626 Tf 211.008 542.324 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(r)18(ows)]TJ 0 g 0 G [-471(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -652,14 +873,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 530.25 Td [(local)]TJ +/F62 9.9626 Tf 186.779 530.25 Td [(local)]TJ ET q 1 0 0 1 208.019 530.449 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 211.008 530.25 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ +/F62 9.9626 Tf 211.008 530.25 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ 0 g 0 G [-673(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -673,14 +894,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 518.176 Td [(global)]TJ +/F62 9.9626 Tf 186.779 518.176 Td [(global)]TJ ET q 1 0 0 1 214.644 518.375 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 217.633 518.176 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(r)18(ows)]TJ +/F62 9.9626 Tf 217.633 518.176 Td [(r)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(r)18(ows)]TJ 0 g 0 G [-641(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -694,14 +915,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 506.102 Td [(global)]TJ +/F62 9.9626 Tf 186.779 506.102 Td [(global)]TJ ET q 1 0 0 1 214.644 506.301 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 217.633 506.102 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ +/F62 9.9626 Tf 217.633 506.102 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ 0 g 0 G [-843(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -715,14 +936,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 494.027 Td [(global)]TJ +/F62 9.9626 Tf 186.779 494.027 Td [(global)]TJ ET q 1 0 0 1 214.644 494.227 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 217.633 494.027 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ +/F62 9.9626 Tf 217.633 494.027 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ 0 g 0 G [-999(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -736,7 +957,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 481.953 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ +/F62 9.9626 Tf 186.779 481.953 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ 0 g 0 G [-852(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -764,28 +985,28 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 188.941 445.731 Td [(cd)]TJ +/F62 9.9626 Tf 188.941 445.731 Td [(cd)]TJ ET q 1 0 0 1 200.049 445.93 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 203.038 445.731 Td [(get)]TJ +/F62 9.9626 Tf 203.038 445.731 Td [(get)]TJ ET q 1 0 0 1 217.195 445.93 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 220.184 445.731 Td [(lar)18(ge)]TJ +/F62 9.9626 Tf 220.184 445.731 Td [(lar)18(ge)]TJ ET q 1 0 0 1 242.729 445.93 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 245.718 445.731 Td [(thr)18(eshold)-190(\227)-190(Get)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -76.084 -11.955 Td [(ping)-250(switch)]TJ +/F62 9.9626 Tf 245.718 445.731 Td [(thr)18(eshold)-190(\227)-190(Get)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -76.084 -11.955 Td [(ping)-250(switch)]TJ 0 g 0 G [-819(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -799,28 +1020,28 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 188.941 421.702 Td [(cd)]TJ +/F62 9.9626 Tf 188.941 421.702 Td [(cd)]TJ ET q 1 0 0 1 200.049 421.901 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 203.038 421.702 Td [(set)]TJ +/F62 9.9626 Tf 203.038 421.702 Td [(set)]TJ ET q 1 0 0 1 215.88 421.901 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 218.869 421.702 Td [(lar)18(ge)]TJ +/F62 9.9626 Tf 218.869 421.702 Td [(lar)18(ge)]TJ ET q 1 0 0 1 241.414 421.901 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 244.403 421.702 Td [(thr)18(eshold)-190(\227)-190(Set)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -74.769 -11.955 Td [(ping)-250(switch)]TJ +/F62 9.9626 Tf 244.403 421.702 Td [(thr)18(eshold)-190(\227)-190(Set)-190(thr)18(eshold)-190(for)-190(index)-190(map-)]TJ -74.769 -11.955 Td [(ping)-250(switch)]TJ 0 g 0 G [-819(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -834,14 +1055,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 397.673 Td [(p)]TJ +/F62 9.9626 Tf 186.779 397.673 Td [(p)]TJ ET q 1 0 0 1 193.364 397.872 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 196.353 397.673 Td [(adjcncy)-250(\227)-250(Get)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ +/F62 9.9626 Tf 196.353 397.673 Td [(adjcncy)-250(\227)-250(Get)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ 0 g 0 G [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -855,14 +1076,14 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 185.464 385.599 Td [(p)]TJ +/F62 9.9626 Tf 185.464 385.599 Td [(p)]TJ ET q 1 0 0 1 192.049 385.798 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 195.038 385.599 Td [(adjcncy)-250(\227)-250(Set)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ +/F62 9.9626 Tf 195.038 385.599 Td [(adjcncy)-250(\227)-250(Set)-250(pr)18(ocess)-250(adjacency)-250(list)]TJ 0 g 0 G [-272(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -876,7 +1097,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 188.423 373.524 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(pr)18(ocess)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ +/F62 9.9626 Tf 188.423 373.524 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(pr)18(ocess)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ 0 g 0 G [-361(.)]TJ 0 g 0 G @@ -911,7 +1132,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 325.228 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 186.779 325.228 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-286(.)-500(.)-500(.)]TJ 0 g 0 G @@ -925,7 +1146,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 313.154 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 186.779 313.154 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-670(.)]TJ 0 g 0 G @@ -939,7 +1160,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 301.08 Td [(nnzer)18(os)-190(\227)-190(Get)-190(number)-190(of)-190(nonzer)18(o)-190(elements)-190(in)-190(a)-190(sparse)]TJ -17.145 -11.955 Td [(matrix)]TJ +/F62 9.9626 Tf 186.779 301.08 Td [(nnzer)18(os)-190(\227)-190(Get)-190(number)-190(of)-190(nonzer)18(o)-190(elements)-190(in)-190(a)-190(sparse)]TJ -17.145 -11.955 Td [(matrix)]TJ 0 g 0 G [-839(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -953,7 +1174,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 277.051 Td [(size)-354(\227)-354(Get)-355(maximum)-354(number)-354(of)-354(nonzer)18(o)-354(elements)]TJ -17.145 -11.955 Td [(in)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 186.779 277.051 Td [(size)-354(\227)-354(Get)-355(maximum)-354(number)-354(of)-354(nonzer)18(o)-354(elements)]TJ -17.145 -11.955 Td [(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-393(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -974,7 +1195,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 228.992 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ +/F62 9.9626 Tf 186.779 228.992 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ 0 g 0 G [-278(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -988,21 +1209,21 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 180.344 216.918 Td [(bld,)-250(is)]TJ +/F62 9.9626 Tf 180.344 216.918 Td [(bld,)-250(is)]TJ ET q 1 0 0 1 207.541 217.117 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 210.53 216.918 Td [(upd,)-250(is)]TJ +/F62 9.9626 Tf 210.53 216.918 Td [(upd,)-250(is)]TJ ET q 1 0 0 1 241.314 217.117 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 244.303 216.918 Td [(asb)-250(\227)-250(Status)-250(check)]TJ +/F62 9.9626 Tf 244.303 216.918 Td [(asb)-250(\227)-250(Status)-250(check)]TJ 0 g 0 G [-569(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1016,28 +1237,28 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 180.344 204.844 Td [(lower)74(,)-250(is)]TJ +/F62 9.9626 Tf 180.344 204.844 Td [(lower)74(,)-250(is)]TJ ET q 1 0 0 1 217.663 205.043 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 220.652 204.844 Td [(upper)74(,)-250(is)]TJ +/F62 9.9626 Tf 220.652 204.844 Td [(upper)74(,)-250(is)]TJ ET q 1 0 0 1 259.306 205.043 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 262.295 204.844 Td [(triangle,)-250(is)]TJ +/F62 9.9626 Tf 262.295 204.844 Td [(triangle,)-250(is)]TJ ET q 1 0 0 1 309.069 205.043 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 312.058 204.844 Td [(unit)-250(\227)-250(Format)-250(check)]TJ +/F62 9.9626 Tf 312.058 204.844 Td [(unit)-250(\227)-250(Format)-250(check)]TJ 0 g 0 G [-441(.)-500(.)]TJ 0 g 0 G @@ -1065,7 +1286,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 196.094 168.622 Td [(zer)18(os)-250(\227)-250(Eliminate)-250(zer)18(o)-250(coef)18(\002cients)]TJ +/F62 9.9626 Tf 196.094 168.622 Td [(zer)18(os)-250(\227)-250(Eliminate)-250(zer)18(o)-250(coef)18(\002cients)]TJ 0 g 0 G [-677(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1079,7 +1300,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 186.779 156.548 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ +/F62 9.9626 Tf 186.779 156.548 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ 0 g 0 G [-870(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1093,7 +1314,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 189.429 144.474 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ +/F62 9.9626 Tf 189.429 144.474 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ 0 g 0 G [-309(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1120,282 +1341,7 @@ ET endstream endobj -404 0 obj -<< -/Type /ObjStm -/N 100 -/First 877 -/Length 7532 ->> -stream -403 0 407 48 408 127 411 175 412 257 415 305 416 361 419 409 420 452 423 494 -424 540 427 587 428 662 431 709 432 797 435 844 436 917 439 964 440 1038 443 1085 -444 1151 447 1198 448 1252 451 1299 452 1380 455 1427 456 1483 459 1530 460 1581 463 1629 -464 1675 467 1723 468 1773 471 1821 472 1871 475 1919 476 1984 479 2032 480 2097 483 2145 -484 2205 487 2253 488 2298 491 2346 492 2394 495 2436 496 2467 499 2514 500 2594 503 2641 -504 2729 507 2776 508 2863 511 2910 512 3013 515 3055 516 3081 519 3128 520 3231 523 3278 -524 3377 527 3424 528 3525 531 3572 532 3673 535 3720 536 3821 539 3868 540 3971 543 4014 -544 4055 547 4103 548 4160 551 4208 552 4262 555 4310 556 4375 559 4423 560 4500 563 4548 -564 4607 567 4655 568 4706 571 4749 572 4784 575 4832 576 4904 577 4949 579 5078 582 5184 -583 5240 3 5296 580 5350 590 5479 592 5593 589 5650 636 5717 593 6211 594 6357 595 6503 -% 403 0 obj -<< /S /GoTo /D (subsection.6.24) >> -% 407 0 obj -(6.24 psb\137get\137overlap \204 Extract list of overlap elements) -% 408 0 obj -<< /S /GoTo /D (subsection.6.25) >> -% 411 0 obj -(6.25 psb\137sp\137getrow \204 Extract row\(s\) from a sparse matrix) -% 412 0 obj -<< /S /GoTo /D (subsection.6.26) >> -% 415 0 obj -(6.26 psb\137sizeof \204 Memory occupation) -% 416 0 obj -<< /S /GoTo /D (subsection.6.27) >> -% 419 0 obj -(6.27 Sorting utilities \204 ) -% 420 0 obj -<< /S /GoTo /D (section.7) >> -% 423 0 obj -(7 Parallel environment routines) -% 424 0 obj -<< /S /GoTo /D (subsection.7.1) >> -% 427 0 obj -(7.1 psb\137init \204 Initializes PSBLAS parallel environment) -% 428 0 obj -<< /S /GoTo /D (subsection.7.2) >> -% 431 0 obj -(7.2 psb\137info \204 Return information about PSBLAS parallel environment) -% 432 0 obj -<< /S /GoTo /D (subsection.7.3) >> -% 435 0 obj -(7.3 psb\137exit \204 Exit from PSBLAS parallel environment) -% 436 0 obj -<< /S /GoTo /D (subsection.7.4) >> -% 439 0 obj -(7.4 psb\137get\137mpi\137comm \204 Get the MPI communicator) -% 440 0 obj -<< /S /GoTo /D (subsection.7.5) >> -% 443 0 obj -(7.5 psb\137get\137mpi\137rank \204 Get the MPI rank) -% 444 0 obj -<< /S /GoTo /D (subsection.7.6) >> -% 447 0 obj -(7.6 psb\137wtime \204 Wall clock timing) -% 448 0 obj -<< /S /GoTo /D (subsection.7.7) >> -% 451 0 obj -(7.7 psb\137barrier \204 Sinchronization point parallel environment) -% 452 0 obj -<< /S /GoTo /D (subsection.7.8) >> -% 455 0 obj -(7.8 psb\137abort \204 Abort a computation) -% 456 0 obj -<< /S /GoTo /D (subsection.7.9) >> -% 459 0 obj -(7.9 psb\137bcast \204 Broadcast data) -% 460 0 obj -<< /S /GoTo /D (subsection.7.10) >> -% 463 0 obj -(7.10 psb\137sum \204 Global sum) -% 464 0 obj -<< /S /GoTo /D (subsection.7.11) >> -% 467 0 obj -(7.11 psb\137max \204 Global maximum) -% 468 0 obj -<< /S /GoTo /D (subsection.7.12) >> -% 471 0 obj -(7.12 psb\137min \204 Global minimum) -% 472 0 obj -<< /S /GoTo /D (subsection.7.13) >> -% 475 0 obj -(7.13 psb\137amx \204 Global maximum absolute value) -% 476 0 obj -<< /S /GoTo /D (subsection.7.14) >> -% 479 0 obj -(7.14 psb\137amn \204 Global minimum absolute value) -% 480 0 obj -<< /S /GoTo /D (subsection.7.15) >> -% 483 0 obj -(7.15 psb\137nrm2 \204 Global 2-norm reduction) -% 484 0 obj -<< /S /GoTo /D (subsection.7.16) >> -% 487 0 obj -(7.16 psb\137snd \204 Send data) -% 488 0 obj -<< /S /GoTo /D (subsection.7.17) >> -% 491 0 obj -(7.17 psb\137rcv \204 Receive data) -% 492 0 obj -<< /S /GoTo /D (section.8) >> -% 495 0 obj -(8 Error handling) -% 496 0 obj -<< /S /GoTo /D (subsection.8.1) >> -% 499 0 obj -(8.1 psb\137errpush \204 Pushes an error code onto the error stack) -% 500 0 obj -<< /S /GoTo /D (subsection.8.2) >> -% 503 0 obj -(8.2 psb\137error \204 Prints the error stack content and aborts execution) -% 504 0 obj -<< /S /GoTo /D (subsection.8.3) >> -% 507 0 obj -(8.3 psb\137set\137errverbosity \204 Sets the verbosity of error messages) -% 508 0 obj -<< /S /GoTo /D (subsection.8.4) >> -% 511 0 obj -(8.4 psb\137set\137erraction \204 Set the type of action to be taken upon error condition) -% 512 0 obj -<< /S /GoTo /D (section.9) >> -% 515 0 obj -(9 Utilities) -% 516 0 obj -<< /S /GoTo /D (subsection.9.1) >> -% 519 0 obj -(9.1 \040hb\137read \204 Read a sparse matrix from a file in the Harwell\205Boeing format) -% 520 0 obj -<< /S /GoTo /D (subsection.9.2) >> -% 523 0 obj -(9.2 hb\137write \204 Write a sparse matrix to a file in the Harwell\205Boeing format) -% 524 0 obj -<< /S /GoTo /D (subsection.9.3) >> -% 527 0 obj -(9.3 mm\137mat\137read \204 Read a sparse matrix from a file in the MatrixMarket format) -% 528 0 obj -<< /S /GoTo /D (subsection.9.4) >> -% 531 0 obj -(9.4 mm\137array\137read \204 Read a dense array from a file in the MatrixMarket format) -% 532 0 obj -<< /S /GoTo /D (subsection.9.5) >> -% 535 0 obj -(9.5 mm\137mat\137write \204 Write a sparse matrix to a file in the MatrixMarket format) -% 536 0 obj -<< /S /GoTo /D (subsection.9.6) >> -% 539 0 obj -(9.6 mm\137array\137write \204 Write a dense array from a file in the MatrixMarket format) -% 540 0 obj -<< /S /GoTo /D (section.10) >> -% 543 0 obj -(10 Preconditioner routines) -% 544 0 obj -<< /S /GoTo /D (subsection.10.1) >> -% 547 0 obj -(10.1 init \204 Initialize a preconditioner) -% 548 0 obj -<< /S /GoTo /D (subsection.10.2) >> -% 551 0 obj -(10.2 build \204 Builds a preconditioner) -% 552 0 obj -<< /S /GoTo /D (subsection.10.3) >> -% 555 0 obj -(10.3 apply \204 Preconditioner application routine) -% 556 0 obj -<< /S /GoTo /D (subsection.10.4) >> -% 559 0 obj -(10.4 descr \204 Prints a description of current preconditioner) -% 560 0 obj -<< /S /GoTo /D (subsection.10.5) >> -% 563 0 obj -(10.5 clone \204 clone current preconditioner) -% 564 0 obj -<< /S /GoTo /D (subsection.10.6) >> -% 567 0 obj -(10.6 free \204 Free a preconditioner) -% 568 0 obj -<< /S /GoTo /D (section.11) >> -% 571 0 obj -(11 Iterative Methods) -% 572 0 obj -<< /S /GoTo /D (subsection.11.1) >> -% 575 0 obj -(11.1 psb\137krylov \040\204 Krylov Methods Driver Routine) -% 576 0 obj -<< /S /GoTo /D [577 0 R /Fit] >> -% 577 0 obj -<< -/Type /Page -/Contents 581 0 R -/Resources 580 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Group 579 0 R ->> -% 579 0 obj -%PTEX Group needed for transparent pngs -<> -% 582 0 obj -<< -/D [577 0 R /XYZ 98.895 753.953 null] ->> -% 583 0 obj -<< -/D [577 0 R /XYZ 99.895 716.092 null] ->> -% 3 0 obj -<< -/D [577 0 R /XYZ 99.895 716.092 null] ->> -% 580 0 obj -<< -/Font << /F51 584 0 R /F52 585 0 R /F54 586 0 R >> -/XObject << /Im1 578 0 R >> -/ProcSet [ /PDF /Text /ImageC ] ->> -% 590 0 obj -<< -/Type /Page -/Contents 591 0 R -/Resources 589 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R ->> -% 592 0 obj -<< -/D [590 0 R /XYZ 149.705 753.953 null] ->> -% 589 0 obj -<< -/Font << /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 636 0 obj -<< -/Type /Page -/Contents 637 0 R -/Resources 635 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 593 0 R 594 0 R 595 0 R 596 0 R 597 0 R 598 0 R 599 0 R 600 0 R 601 0 R 602 0 R 603 0 R 604 0 R 605 0 R 606 0 R 607 0 R 608 0 R 609 0 R 610 0 R 611 0 R 640 0 R 612 0 R 641 0 R 613 0 R 614 0 R 615 0 R 616 0 R 617 0 R 618 0 R 619 0 R 620 0 R 621 0 R 642 0 R 622 0 R 643 0 R 623 0 R 644 0 R 624 0 R 625 0 R 626 0 R 627 0 R 628 0 R 629 0 R 630 0 R 631 0 R 632 0 R 633 0 R ] ->> -% 593 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 681.973 173.389 691.303] -/A << /S /GoTo /D (section.1) >> ->> -% 594 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 659.857 196.921 669.187] -/A << /S /GoTo /D (section.2) >> ->> -% 595 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 647.803 227.028 657.212] -/A << /S /GoTo /D (subsection.2.1) >> ->> - -endstream -endobj -695 0 obj +775 0 obj << /Length 15896 >> @@ -1404,28 +1350,28 @@ stream 0 g 0 G 0 0 1 rg 0 0 1 RG BT -/F54 9.9626 Tf 188.563 706.129 Td [(3.2.17)-700(psb)]TJ +/F62 9.9626 Tf 188.563 706.129 Td [(3.2.17)-700(psb)]TJ ET q 1 0 0 1 236.762 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 239.751 706.129 Td [(set)]TJ +/F62 9.9626 Tf 239.751 706.129 Td [(set)]TJ ET q 1 0 0 1 252.592 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 255.581 706.129 Td [(mat)]TJ +/F62 9.9626 Tf 255.581 706.129 Td [(mat)]TJ ET q 1 0 0 1 273.205 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 276.194 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ +/F62 9.9626 Tf 276.194 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ 0 g 0 G [-829(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1467,7 +1413,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 237.589 645.73 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(dense)-250(vector)]TJ +/F62 9.9626 Tf 237.589 645.73 Td [(nr)18(ows)-250(\227)-250(Get)-250(number)-250(of)-250(r)18(ows)-250(in)-250(a)-250(dense)-250(vector)]TJ 0 g 0 G [-690(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1494,7 +1440,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 237.589 609.491 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ +/F62 9.9626 Tf 237.589 609.491 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ 0 g 0 G [-770(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1522,18 +1468,18 @@ BT [-1500(28)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -22.125 Td [(4)-1000(Computational)-250(routines)]TJ +/F59 9.9626 Tf -14.944 -22.125 Td [(4)-1000(Computational)-250(routines)]TJ 0 g 0 G [-21085(29)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.08 Td [(4.1)-1050(psb)]TJ +/F62 9.9626 Tf 14.944 -12.08 Td [(4.1)-1050(psb)]TJ ET q 1 0 0 1 204.881 539.246 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 539.047 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ +/F62 9.9626 Tf 207.87 539.047 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ 0 g 0 G [-539(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1547,7 +1493,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 526.967 Td [(gedot)-250(\227)-250(Dot)-250(Pr)18(oduct)]TJ +/F62 9.9626 Tf 207.87 526.967 Td [(gedot)-250(\227)-250(Dot)-250(Pr)18(oduct)]TJ 0 g 0 G [-837(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1561,7 +1507,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 514.888 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Pr)18(oduct)]TJ +/F62 9.9626 Tf 207.87 514.888 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Pr)18(oduct)]TJ 0 g 0 G [-793(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1575,7 +1521,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 502.808 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 502.808 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-868(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1589,7 +1535,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 490.728 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ +/F62 9.9626 Tf 207.87 490.728 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ 0 g 0 G [-600(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1603,7 +1549,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 478.649 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 478.649 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-438(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1617,7 +1563,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 466.569 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 466.569 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-605(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1631,7 +1577,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 454.489 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 454.489 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-438(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1645,7 +1591,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 442.409 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ +/F62 9.9626 Tf 207.87 442.409 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ 0 g 0 G [-655(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1659,7 +1605,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 430.33 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ +/F62 9.9626 Tf 207.87 430.33 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ 0 g 0 G [-841(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1673,7 +1619,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 418.25 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ +/F62 9.9626 Tf 207.87 418.25 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ 0 g 0 G [-604(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1687,7 +1633,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 406.17 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct)]TJ +/F62 9.9626 Tf 207.87 406.17 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct)]TJ 0 g 0 G [-491(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1701,7 +1647,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 394.091 Td [(spsm)-250(\227)-250(T)90(riangular)-250(System)-250(Solve)]TJ +/F62 9.9626 Tf 207.87 394.091 Td [(spsm)-250(\227)-250(T)90(riangular)-250(System)-250(Solve)]TJ 0 g 0 G [-945(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1715,7 +1661,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 382.011 Td [(gemlt)-250(\227)-250(Entrywise)-250(Pr)18(oduct)]TJ +/F62 9.9626 Tf 207.87 382.011 Td [(gemlt)-250(\227)-250(Entrywise)-250(Pr)18(oduct)]TJ 0 g 0 G [-968(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1729,7 +1675,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 369.931 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ +/F62 9.9626 Tf 207.87 369.931 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ 0 g 0 G [-748(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1743,25 +1689,25 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 357.852 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ +/F62 9.9626 Tf 207.87 357.852 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ 0 g 0 G [-340(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(57)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -57.165 -22.126 Td [(5)-1000(Communication)-250(routines)]TJ +/F59 9.9626 Tf -57.165 -22.126 Td [(5)-1000(Communication)-250(routines)]TJ 0 g 0 G [-20585(58)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.079 Td [(5.1)-1050(psb)]TJ +/F62 9.9626 Tf 14.944 -12.079 Td [(5.1)-1050(psb)]TJ ET q 1 0 0 1 204.881 323.846 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 323.647 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ +/F62 9.9626 Tf 207.87 323.647 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ 0 g 0 G [-888(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1775,7 +1721,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 311.567 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ +/F62 9.9626 Tf 207.87 311.567 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ 0 g 0 G [-553(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1789,7 +1735,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 299.487 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ +/F62 9.9626 Tf 207.87 299.487 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ 0 g 0 G [-973(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1803,25 +1749,25 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 287.407 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ +/F62 9.9626 Tf 207.87 287.407 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ 0 g 0 G [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G [-1500(68)]TJ 0 g 0 G 0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -57.165 -22.125 Td [(6)-1000(Data)-250(management)-250(routines)]TJ +/F59 9.9626 Tf -57.165 -22.125 Td [(6)-1000(Data)-250(management)-250(routines)]TJ 0 g 0 G [-19668(70)]TJ 0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.08 Td [(6.1)-1050(psb)]TJ +/F62 9.9626 Tf 14.944 -12.08 Td [(6.1)-1050(psb)]TJ ET q 1 0 0 1 204.881 253.402 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 253.202 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ +/F62 9.9626 Tf 207.87 253.202 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G [-363(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1835,7 +1781,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 241.123 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(r)18(outine)]TJ +/F62 9.9626 Tf 207.87 241.123 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(r)18(outine)]TJ 0 g 0 G [-261(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1849,7 +1795,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 229.043 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(r)18(outine)]TJ +/F62 9.9626 Tf 207.87 229.043 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(r)18(outine)]TJ 0 g 0 G [-718(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1863,7 +1809,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 216.963 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ +/F62 9.9626 Tf 207.87 216.963 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G [-873(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1877,7 +1823,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 204.884 Td [(cdfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(communication)-250(descriptor)]TJ +/F62 9.9626 Tf 207.87 204.884 Td [(cdfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G [-791(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1891,7 +1837,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 192.804 Td [(cdbldext)-250(\227)-250(Build)-250(an)-250(extended)-250(communication)-250(descriptor)]TJ +/F62 9.9626 Tf 207.87 192.804 Td [(cdbldext)-250(\227)-250(Build)-250(an)-250(extended)-250(communication)-250(descriptor)]TJ 0 g 0 G [-676(.)]TJ 0 g 0 G @@ -1905,7 +1851,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 180.724 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 207.87 180.724 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-842(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1919,7 +1865,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 168.644 Td [(spins)-250(\227)-250(Insert)-250(a)-250(set)-250(of)-250(coef)18(\002cients)-250(into)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 207.87 168.644 Td [(spins)-250(\227)-250(Insert)-250(a)-250(set)-250(of)-250(coef)18(\002cients)-250(into)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-625(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1933,7 +1879,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 156.565 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(r)18(outine)]TJ +/F62 9.9626 Tf 207.87 156.565 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(r)18(outine)]TJ 0 g 0 G [-611(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1947,7 +1893,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 144.485 Td [(spfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(sparse)-250(matrix)]TJ +/F62 9.9626 Tf 207.87 144.485 Td [(spfr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G [-520(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1961,7 +1907,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 132.405 Td [(sprn)-250(\227)-250(Reinit)-250(sparse)-250(matrix)-250(str)8(uctur)18(e)-250(for)-250(psblas)-250(r)18(outines.)]TJ +/F62 9.9626 Tf 207.87 132.405 Td [(sprn)-250(\227)-250(Reinit)-250(sparse)-250(matrix)-250(str)8(uctur)18(e)-250(for)-250(psblas)-250(r)18(outines.)]TJ 0 g 0 G [-725(.)]TJ 0 g 0 G @@ -1975,7 +1921,7 @@ q []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 207.87 120.326 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ +/F62 9.9626 Tf 207.87 120.326 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ 0 g 0 G [-330(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ 0 g 0 G @@ -1988,716 +1934,159 @@ ET endstream endobj -741 0 obj +605 0 obj << -/Length 18214 +/Type /ObjStm +/N 100 +/First 904 +/Length 14415 >> stream -0 g 0 G -0 g 0 G -0 0 1 rg 0 0 1 RG -BT -/F54 9.9626 Tf 114.839 706.129 Td [(6.13)-550(psb)]TJ -ET -q -1 0 0 1 154.072 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(r)18(outine)]TJ -0 g 0 G - [-411(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(92)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(6.14)-550(psb)]TJ -ET -q -1 0 0 1 154.072 694.234 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 694.035 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ -0 g 0 G - [-376(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(94)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(6.15)-550(psb)]TJ -ET -q -1 0 0 1 154.072 682.14 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 681.941 Td [(gefr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(dense)-250(matrix)]TJ -0 g 0 G - [-758(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(95)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(6.16)-550(psb)]TJ -ET -q -1 0 0 1 154.072 670.046 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 669.846 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ -0 g 0 G - [-801(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(96)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(6.17)-550(psb)]TJ -ET -q -1 0 0 1 154.072 657.951 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 657.752 Td [(glob)]TJ -ET -q -1 0 0 1 177.046 657.951 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 180.034 657.752 Td [(to)]TJ -ET -q -1 0 0 1 189.319 657.951 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 192.308 657.752 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ -0 g 0 G - [-427(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(97)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -77.469 -12.094 Td [(6.18)-550(psb)]TJ -ET -q -1 0 0 1 154.072 645.857 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 645.658 Td [(loc)]TJ -ET -q -1 0 0 1 170.42 645.857 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 173.409 645.658 Td [(to)]TJ -ET -q -1 0 0 1 182.694 645.857 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 185.683 645.658 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ -0 g 0 G - [-966(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1500(99)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -70.844 -12.094 Td [(6.19)-550(psb)]TJ -ET -q -1 0 0 1 154.072 633.763 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 633.564 Td [(is)]TJ -ET -q -1 0 0 1 164.782 633.763 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 167.771 633.564 Td [(owned)-250(\227)]TJ -0 g 0 G - [-1135(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(100)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -52.932 -12.094 Td [(6.20)-550(psb)]TJ -ET -q -1 0 0 1 154.072 621.669 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 621.47 Td [(owned)]TJ -ET -q -1 0 0 1 188.064 621.669 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 191.053 621.47 Td [(index)-250(\227)]TJ -0 g 0 G - [-871(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(101)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -76.214 -12.095 Td [(6.21)-550(psb)]TJ -ET -q -1 0 0 1 154.072 609.575 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 609.375 Td [(is)]TJ -ET -q -1 0 0 1 164.782 609.575 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 167.771 609.375 Td [(local)-250(\227)]TJ -0 g 0 G - [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(102)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -52.932 -12.094 Td [(6.22)-550(psb)]TJ -ET -q -1 0 0 1 154.072 597.481 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 597.281 Td [(local)]TJ -ET -q -1 0 0 1 178.301 597.481 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 181.29 597.281 Td [(index)-250(\227)]TJ -0 g 0 G - [-1101(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(103)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -66.451 -12.094 Td [(6.23)-550(psb)]TJ -ET -q -1 0 0 1 154.072 585.386 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 585.187 Td [(get)]TJ -ET -q -1 0 0 1 171.217 585.386 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 585.187 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ -0 g 0 G - [-827(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(104)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -59.367 -12.094 Td [(6.24)-550(psb)]TJ -ET -q -1 0 0 1 154.072 573.292 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 573.093 Td [(get)]TJ -ET -q -1 0 0 1 171.217 573.292 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 573.093 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(105)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -59.367 -12.094 Td [(6.25)-550(psb)]TJ -ET -q -1 0 0 1 154.072 561.198 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 560.999 Td [(sp)]TJ -ET -q -1 0 0 1 167.87 561.198 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 170.859 560.999 Td [(getr)18(ow)-250(\227)-250(Extract)-250(r)18(ow\050s\051)-250(fr)18(om)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G - [-671(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(106)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -56.02 -12.094 Td [(6.26)-550(psb)]TJ -ET -q -1 0 0 1 154.072 549.104 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 548.905 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ -0 g 0 G - [-251(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(108)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(6.27)-550(Sorting)-250(utilities)-250(\227)]TJ -0 g 0 G - [-1157(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(109)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -22.149 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ -0 g 0 G - [-17835(111)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.094 Td [(7.1)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 502.766 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 502.567 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ -0 g 0 G - [-766(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(112)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(7.2)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 490.672 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 490.472 Td [(info)-264(\227)-264(Return)-264(information)-264(abou)1(t)-264(PSBLAS)-264(parallel)-264(envir)18(on-)]TJ -19.308 -11.955 Td [(ment)]TJ -0 g 0 G - [-930(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(113)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -12.094 Td [(7.3)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 466.622 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 466.423 Td [(exit)-250(\227)-250(Exit)-250(fr)18(om)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ -0 g 0 G - [-823(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(114)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.4)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 454.528 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 454.329 Td [(get)]TJ -ET -q -1 0 0 1 171.217 454.528 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 454.329 Td [(mpi)]TJ -ET -q -1 0 0 1 192.487 454.528 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 195.476 454.329 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ -0 g 0 G - [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(115)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -80.637 -12.094 Td [(7.5)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 442.434 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 442.235 Td [(get)]TJ -ET -q -1 0 0 1 171.217 442.434 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.206 442.235 Td [(mpi)]TJ -ET -q -1 0 0 1 192.487 442.434 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 195.476 442.235 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ -0 g 0 G - [-498(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(116)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -80.637 -12.094 Td [(7.6)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 430.34 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 430.141 Td [(wtime)-250(\227)-250(W)92(all)-250(clock)-250(timing)]TJ -0 g 0 G - [-499(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(117)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(7.7)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 418.246 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 418.046 Td [(barrier)-250(\227)-250(Sinchr)18(onization)-250(point)-250(parallel)-250(envir)18(onment)]TJ -0 g 0 G - [-903(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(118)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.8)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 406.151 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 405.952 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ -0 g 0 G - [-946(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(119)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.9)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 394.057 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 393.858 Td [(bcast)-250(\227)-250(Br)18(oadcast)-250(data)]TJ -0 g 0 G - [-739(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(120)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.10)-550(psb)]TJ -ET -q -1 0 0 1 154.072 381.963 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 381.764 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ -0 g 0 G - [-998(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(122)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.11)-550(psb)]TJ -ET -q -1 0 0 1 154.072 369.869 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 369.67 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ -0 g 0 G - [-610(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(124)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.095 Td [(7.12)-550(psb)]TJ -ET -q -1 0 0 1 154.072 357.775 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 357.575 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ -0 g 0 G - [-896(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(126)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.13)-550(psb)]TJ -ET -q -1 0 0 1 154.072 345.681 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 345.481 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ -0 g 0 G - [-700(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(128)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.14)-550(psb)]TJ -ET -q -1 0 0 1 154.072 333.586 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 333.387 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ -0 g 0 G - [-777(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(130)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.15)-550(psb)]TJ -ET -q -1 0 0 1 154.072 321.492 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 321.293 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(r)18(eduction)]TJ -0 g 0 G - [-710(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(132)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.16)-550(psb)]TJ -ET -q -1 0 0 1 154.072 309.398 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 309.199 Td [(snd)-250(\227)-250(Send)-250(data)]TJ -0 g 0 G - [-511(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(134)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(7.17)-550(psb)]TJ -ET -q -1 0 0 1 154.072 297.304 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 297.105 Td [(r)18(cv)-250(\227)-250(Receive)-250(data)]TJ -0 g 0 G - [-284(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(135)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -57.166 -22.15 Td [(8)-1000(Error)-250(handling)]TJ -0 g 0 G - [-24750(136)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.094 Td [(8.1)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 263.06 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 262.861 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(err)18(or)-250(code)-250(onto)-250(the)-250(err)18(or)-250(stack)]TJ -0 g 0 G - [-734(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(138)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(8.2)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 250.966 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 250.767 Td [(err)18(or)-250(\227)-250(Prints)-250(the)-250(err)18(or)-250(stack)-250(content)-250(and)-250(aborts)-250(execution)]TJ -0 g 0 G -0 g 0 G - [-1381(139)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -42.222 -12.094 Td [(8.3)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 238.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 238.673 Td [(set)]TJ -ET -q -1 0 0 1 169.902 238.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 172.891 238.673 Td [(errverbosity)-250(\227)-250(Sets)-250(the)-250(verbosity)-250(of)-250(err)18(or)-250(messages)]TJ -0 g 0 G - [-253(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(140)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -58.052 -12.095 Td [(8.4)-1050(psb)]TJ -ET -q -1 0 0 1 154.072 226.778 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 157.061 226.578 Td [(set)]TJ -ET -q -1 0 0 1 169.902 226.778 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 172.891 226.578 Td [(erraction)-232(\227)-231(Set)-232(the)-231(type)-232(of)-231(action)-232(to)-232(b)1(e)-232(taken)-232(upon)-231(err)18(or)]TJ -35.138 -11.955 Td [(condition)]TJ -0 g 0 G - [-481(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(141)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -37.858 -22.149 Td [(9)-1000(Utilities)]TJ -0 g 0 G - [-27780(142)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -12.095 Td [(9.1)-1450(hb)]TJ -ET -q -1 0 0 1 153.644 180.579 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 156.633 180.38 Td [(r)18(ead)-400(\227)-400(Read)-400(a)-400(sparse)-400(matrix)-400(fr)18(om)-400(a)-400(\002le)-400(in)-400(the)-400(Harwell\226)]TJ -18.88 -11.956 Td [(Boeing)-250(format)]TJ -0 g 0 G - [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(143)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -12.094 Td [(9.2)-1050(hb)]TJ -ET -q -1 0 0 1 149.659 156.529 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 152.647 156.33 Td [(write)-226(\227)-226(W)74(rite)-226(a)-226(sparse)-226(matrix)-226(to)-226(a)-225(\002le)-226(in)-226(the)-226(Harwell\226Boeing)]TJ -14.894 -11.955 Td [(format)]TJ -0 g 0 G - [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(144)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -12.094 Td [(9.3)-1050(mm)]TJ -ET -q -1 0 0 1 155.945 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 158.934 132.281 Td [(mat)]TJ -ET -q -1 0 0 1 176.558 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 179.546 132.281 Td [(r)18(ead)-265(\227)-265(Read)-265(a)-265(sparse)-265(matrix)-265(fr)18(om)-265(a)-265(\002le)-265(in)-265(the)-265(Matrix-)]TJ -41.793 -11.955 Td [(Market)-250(format)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(145)]TJ -0 g 0 G -0 g 0 G - 129.649 -29.888 Td [(iii)]TJ -0 g 0 G -ET - -endstream -endobj -645 0 obj +604 0 608 43 609 137 612 180 613 304 616 347 617 491 620 535 621 654 624 698 +625 817 628 861 629 995 632 1039 633 1153 636 1197 637 1346 640 1390 641 1539 644 1583 +645 1752 648 1796 649 1945 652 1989 653 2133 656 2177 657 2346 658 2391 660 2520 663 2626 +664 2682 3 2738 661 2792 671 2921 673 3035 670 3092 717 3159 674 3653 675 3799 676 3945 +677 4097 678 4249 679 4401 680 4556 681 4708 682 4851 683 5003 684 5160 685 5317 686 5474 +687 5629 688 5786 689 5943 690 6100 691 6257 692 6410 721 6568 693 6723 722 6881 694 7038 +695 7196 696 7354 697 7512 698 7670 699 7821 700 7978 701 8134 702 8291 723 8448 703 8604 +724 8761 704 8916 725 9073 705 9229 706 9386 707 9543 708 9700 709 9858 710 10014 711 10172 +712 10330 713 10488 714 10646 719 10801 720 10857 716 10913 774 10993 715 11495 726 11653 727 11811 +728 11969 729 12120 730 12277 731 12434 732 12590 733 12747 734 12904 735 13061 736 13213 737 13365 +% 604 0 obj +<< /S /GoTo /D (section*.7) >> +% 608 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000e\000x\000i\000t) +% 609 0 obj +<< /S /GoTo /D (section*.8) >> +% 612 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000D\000e\000v\000i\000c\000e\000S\000y\000n\000c) +% 613 0 obj +<< /S /GoTo /D (section*.9) >> +% 616 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000g\000e\000t\000D\000e\000v\000i\000c\000e\000C\000o\000u\000n\000t) +% 617 0 obj +<< /S /GoTo /D (section*.10) >> +% 620 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000g\000e\000t\000D\000e\000v\000i\000c\000e) +% 621 0 obj +<< /S /GoTo /D (section*.11) >> +% 624 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000s\000e\000t\000D\000e\000v\000i\000c\000e) +% 625 0 obj +<< /S /GoTo /D (section*.12) >> +% 628 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000D\000e\000v\000i\000c\000e\000H\000a\000s\000U\000V\000A) +% 629 0 obj +<< /S /GoTo /D (section*.13) >> +% 632 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000W\000a\000r\000p\000S\000i\000z\000e) +% 633 0 obj +<< /S /GoTo /D (section*.14) >> +% 636 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000u\000l\000t\000i\000P\000r\000o\000c\000e\000s\000s\000o\000r\000s) +% 637 0 obj +<< /S /GoTo /D (section*.15) >> +% 640 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000a\000x\000T\000h\000r\000e\000a\000d\000s\000P\000e\000r\000M\000P) +% 641 0 obj +<< /S /GoTo /D (section*.16) >> +% 644 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000a\000x\000R\000e\000g\000i\000s\000t\000e\000r\000P\000e\000r\000B\000l\000o\000c\000k) +% 645 0 obj +<< /S /GoTo /D (section*.17) >> +% 648 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000e\000m\000o\000r\000y\000C\000l\000o\000c\000k\000R\000a\000t\000e) +% 649 0 obj +<< /S /GoTo /D (section*.18) >> +% 652 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000e\000m\000o\000r\000y\000B\000u\000s\000W\000i\000d\000t\000h) +% 653 0 obj +<< /S /GoTo /D (section*.19) >> +% 656 0 obj +(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000M\000e\000m\000o\000r\000y\000P\000e\000a\000k\000B\000a\000n\000d\000w\000i\000d\000t\000h) +% 657 0 obj +<< /S /GoTo /D [658 0 R /Fit] >> +% 658 0 obj << -/Type /ObjStm -/N 100 -/First 927 -/Length 16599 ->> -stream -596 0 597 152 598 304 599 459 600 611 601 754 602 906 603 1063 604 1220 605 1377 -606 1532 607 1689 608 1846 609 2003 610 2160 611 2313 640 2471 612 2626 641 2784 613 2941 -614 3099 615 3257 616 3415 617 3573 618 3724 619 3881 620 4037 621 4194 642 4351 622 4507 -643 4664 623 4819 644 4976 624 5132 625 5289 626 5446 627 5603 628 5761 629 5917 630 6075 -631 6233 632 6391 633 6549 638 6704 639 6760 635 6816 694 6896 634 7398 646 7556 647 7714 -648 7872 649 8023 650 8180 651 8337 652 8493 653 8650 654 8807 655 8964 656 9116 657 9268 -658 9414 659 9566 660 9718 661 9870 662 10022 663 10174 664 10326 665 10478 666 10630 667 10781 -668 10934 669 11087 670 11240 671 11393 672 11546 673 11699 674 11852 675 11999 676 12149 677 12301 -678 12453 679 12605 680 12752 681 12904 682 13056 683 13208 684 13360 685 13512 686 13664 687 13816 -688 13968 689 14120 690 14273 691 14424 696 14575 693 14632 740 14712 692 15214 697 15367 698 15519 -% 596 0 obj +/Type /Page +/Contents 662 0 R +/Resources 661 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Group 660 0 R +>> +% 660 0 obj +%PTEX Group needed for transparent pngs +<> +% 663 0 obj +<< +/D [658 0 R /XYZ 98.895 753.953 null] +>> +% 664 0 obj +<< +/D [658 0 R /XYZ 99.895 716.092 null] +>> +% 3 0 obj +<< +/D [658 0 R /XYZ 99.895 716.092 null] +>> +% 661 0 obj +<< +/Font << /F59 665 0 R /F60 666 0 R /F62 667 0 R >> +/XObject << /Im1 659 0 R >> +/ProcSet [ /PDF /Text /ImageC ] +>> +% 671 0 obj +<< +/Type /Page +/Contents 672 0 R +/Resources 670 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +>> +% 673 0 obj +<< +/D [671 0 R /XYZ 149.705 753.953 null] +>> +% 670 0 obj +<< +/Font << /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 717 0 obj +<< +/Type /Page +/Contents 718 0 R +/Resources 716 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Annots [ 674 0 R 675 0 R 676 0 R 677 0 R 678 0 R 679 0 R 680 0 R 681 0 R 682 0 R 683 0 R 684 0 R 685 0 R 686 0 R 687 0 R 688 0 R 689 0 R 690 0 R 691 0 R 692 0 R 721 0 R 693 0 R 722 0 R 694 0 R 695 0 R 696 0 R 697 0 R 698 0 R 699 0 R 700 0 R 701 0 R 702 0 R 723 0 R 703 0 R 724 0 R 704 0 R 725 0 R 705 0 R 706 0 R 707 0 R 708 0 R 709 0 R 710 0 R 711 0 R 712 0 R 713 0 R 714 0 R ] +>> +% 674 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 681.973 173.389 691.303] +/A << /S /GoTo /D (section.1) >> +>> +% 675 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 659.857 196.921 669.187] +/A << /S /GoTo /D (section.2) >> +>> +% 676 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 647.803 227.028 657.212] +/A << /S /GoTo /D (subsection.2.1) >> +>> +% 677 0 obj << /Type /Annot /Subtype /Link @@ -2705,7 +2094,7 @@ stream /Rect [113.843 633.079 211.078 645.138] /A << /S /GoTo /D (subsection.2.2) >> >> -% 597 0 obj +% 678 0 obj << /Type /Annot /Subtype /Link @@ -2713,7 +2102,7 @@ stream /Rect [113.843 621.004 233.094 633.064] /A << /S /GoTo /D (subsection.2.3) >> >> -% 598 0 obj +% 679 0 obj << /Type /Annot /Subtype /Link @@ -2721,7 +2110,7 @@ stream /Rect [136.757 608.93 301.886 620.99] /A << /S /GoTo /D (subsubsection.2.3.1) >> >> -% 599 0 obj +% 680 0 obj << /Type /Annot /Subtype /Link @@ -2729,7 +2118,7 @@ stream /Rect [113.843 596.856 230.734 608.916] /A << /S /GoTo /D (subsection.2.4) >> >> -% 600 0 obj +% 681 0 obj << /Type /Annot /Subtype /Link @@ -2737,7 +2126,7 @@ stream /Rect [98.899 577.37 242.261 586.7] /A << /S /GoTo /D (section.3) >> >> -% 601 0 obj +% 682 0 obj << /Type /Annot /Subtype /Link @@ -2745,7 +2134,7 @@ stream /Rect [113.843 562.666 249.144 574.726] /A << /S /GoTo /D (subsection.3.1) >> >> -% 602 0 obj +% 683 0 obj << /Type /Annot /Subtype /Link @@ -2753,7 +2142,7 @@ stream /Rect [136.757 550.592 258.689 562.652] /A << /S /GoTo /D (subsubsection.3.1.1) >> >> -% 603 0 obj +% 684 0 obj << /Type /Annot /Subtype /Link @@ -2761,7 +2150,7 @@ stream /Rect [136.757 538.518 360.207 550.578] /A << /S /GoTo /D (subsubsection.3.1.2) >> >> -% 604 0 obj +% 685 0 obj << /Type /Annot /Subtype /Link @@ -2769,7 +2158,7 @@ stream /Rect [136.757 526.444 350.723 538.504] /A << /S /GoTo /D (subsubsection.3.1.3) >> >> -% 605 0 obj +% 686 0 obj << /Type /Annot /Subtype /Link @@ -2777,7 +2166,7 @@ stream /Rect [136.757 514.37 373.457 526.43] /A << /S /GoTo /D (subsubsection.3.1.4) >> >> -% 606 0 obj +% 687 0 obj << /Type /Annot /Subtype /Link @@ -2785,7 +2174,7 @@ stream /Rect [136.757 502.296 363.973 514.355] /A << /S /GoTo /D (subsubsection.3.1.5) >> >> -% 607 0 obj +% 688 0 obj << /Type /Annot /Subtype /Link @@ -2793,7 +2182,7 @@ stream /Rect [136.757 490.222 384.834 502.281] /A << /S /GoTo /D (subsubsection.3.1.6) >> >> -% 608 0 obj +% 689 0 obj << /Type /Annot /Subtype /Link @@ -2801,7 +2190,7 @@ stream /Rect [136.757 478.148 356.411 490.013] /A << /S /GoTo /D (subsubsection.3.1.7) >> >> -% 609 0 obj +% 690 0 obj << /Type /Annot /Subtype /Link @@ -2809,7 +2198,7 @@ stream /Rect [136.757 466.074 297.523 478.133] /A << /S /GoTo /D (subsubsection.3.1.8) >> >> -% 610 0 obj +% 691 0 obj << /Type /Annot /Subtype /Link @@ -2817,7 +2206,7 @@ stream /Rect [136.757 454 345.014 466.059] /A << /S /GoTo /D (subsubsection.3.1.9) >> >> -% 611 0 obj +% 692 0 obj << /Type /Annot /Subtype /Link @@ -2825,7 +2214,7 @@ stream /Rect [136.757 441.925 444.603 453.985] /A << /S /GoTo /D (subsubsection.3.1.10) >> >> -% 640 0 obj +% 721 0 obj << /Type /Annot /Subtype /Link @@ -2833,7 +2222,7 @@ stream /Rect [98.899 429.97 222.246 442.03] /A << /S /GoTo /D (subsubsection.3.1.10) >> >> -% 612 0 obj +% 693 0 obj << /Type /Annot /Subtype /Link @@ -2841,7 +2230,7 @@ stream /Rect [136.757 417.896 444.603 429.956] /A << /S /GoTo /D (subsubsection.3.1.11) >> >> -% 641 0 obj +% 722 0 obj << /Type /Annot /Subtype /Link @@ -2849,7 +2238,7 @@ stream /Rect [98.899 405.941 222.246 418.001] /A << /S /GoTo /D (subsubsection.3.1.11) >> >> -% 613 0 obj +% 694 0 obj << /Type /Annot /Subtype /Link @@ -2857,7 +2246,7 @@ stream /Rect [136.757 393.867 358.404 405.927] /A << /S /GoTo /D (subsubsection.3.1.12) >> >> -% 614 0 obj +% 695 0 obj << /Type /Annot /Subtype /Link @@ -2865,7 +2254,7 @@ stream /Rect [136.757 381.793 354.718 393.853] /A << /S /GoTo /D (subsubsection.3.1.13) >> >> -% 615 0 obj +% 696 0 obj << /Type /Annot /Subtype /Link @@ -2873,7 +2262,7 @@ stream /Rect [136.757 369.719 413.607 381.778] /A << /S /GoTo /D (subsubsection.3.1.14) >> >> -% 616 0 obj +% 697 0 obj << /Type /Annot /Subtype /Link @@ -2881,7 +2270,7 @@ stream /Rect [136.757 360.295 250.062 369.704] /A << /S /GoTo /D (subsubsection.3.1.15) >> >> -% 617 0 obj +% 698 0 obj << /Type /Annot /Subtype /Link @@ -2889,7 +2278,7 @@ stream /Rect [113.843 345.571 223.242 357.63] /A << /S /GoTo /D (subsection.3.2) >> >> -% 618 0 obj +% 699 0 obj << /Type /Annot /Subtype /Link @@ -2897,7 +2286,7 @@ stream /Rect [136.757 333.497 273.364 345.556] /A << /S /GoTo /D (subsubsection.3.2.1) >> >> -% 619 0 obj +% 700 0 obj << /Type /Annot /Subtype /Link @@ -2905,7 +2294,7 @@ stream /Rect [136.757 321.423 399.41 333.482] /A << /S /GoTo /D (subsubsection.3.2.2) >> >> -% 620 0 obj +% 701 0 obj << /Type /Annot /Subtype /Link @@ -2913,7 +2302,7 @@ stream /Rect [136.757 309.348 410.528 321.408] /A << /S /GoTo /D (subsubsection.3.2.3) >> >> -% 621 0 obj +% 702 0 obj << /Type /Annot /Subtype /Link @@ -2921,7 +2310,7 @@ stream /Rect [136.757 297.274 444.603 309.334] /A << /S /GoTo /D (subsubsection.3.2.4) >> >> -% 642 0 obj +% 723 0 obj << /Type /Annot /Subtype /Link @@ -2929,7 +2318,7 @@ stream /Rect [98.899 287.969 199.631 297.075] /A << /S /GoTo /D (subsubsection.3.2.4) >> >> -% 622 0 obj +% 703 0 obj << /Type /Annot /Subtype /Link @@ -2937,7 +2326,7 @@ stream /Rect [136.757 273.245 444.603 285.305] /A << /S /GoTo /D (subsubsection.3.2.5) >> >> -% 643 0 obj +% 724 0 obj << /Type /Annot /Subtype /Link @@ -2945,7 +2334,7 @@ stream /Rect [98.899 261.29 248.906 273.046] /A << /S /GoTo /D (subsubsection.3.2.5) >> >> -% 623 0 obj +% 704 0 obj << /Type /Annot /Subtype /Link @@ -2953,7 +2342,7 @@ stream /Rect [136.757 249.216 444.603 261.276] /A << /S /GoTo /D (subsubsection.3.2.6) >> >> -% 644 0 obj +% 725 0 obj << /Type /Annot /Subtype /Link @@ -2961,7 +2350,7 @@ stream /Rect [98.899 239.911 185.853 249.016] /A << /S /GoTo /D (subsubsection.3.2.6) >> >> -% 624 0 obj +% 705 0 obj << /Type /Annot /Subtype /Link @@ -2969,7 +2358,7 @@ stream /Rect [136.757 225.187 384.545 237.246] /A << /S /GoTo /D (subsubsection.3.2.7) >> >> -% 625 0 obj +% 706 0 obj << /Type /Annot /Subtype /Link @@ -2977,1520 +2366,2954 @@ stream /Rect [136.757 213.113 329.343 225.172] /A << /S /GoTo /D (subsubsection.3.2.8) >> >> -% 626 0 obj +% 707 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 201.038 405.337 213.098] +/A << /S /GoTo /D (subsubsection.3.2.9) >> +>> +% 708 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 188.964 371.724 201.024] +/A << /S /GoTo /D (subsubsection.3.2.10) >> +>> +% 709 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 176.89 309.029 188.95] +/A << /S /GoTo /D (subsubsection.3.2.11) >> +>> +% 710 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 167.466 350.683 176.876] +/A << /S /GoTo /D (subsubsection.3.2.12) >> +>> +% 711 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 152.742 303.929 164.802] +/A << /S /GoTo /D (subsubsection.3.2.13) >> +>> +% 712 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 140.668 324.462 152.728] +/A << /S /GoTo /D (subsubsection.3.2.14) >> +>> +% 713 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 128.594 309.687 140.654] +/A << /S /GoTo /D (subsubsection.3.2.15) >> +>> +% 714 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [136.757 116.52 314.13 128.58] +/A << /S /GoTo /D (subsubsection.3.2.16) >> +>> +% 719 0 obj +<< +/D [717 0 R /XYZ 98.895 753.953 null] +>> +% 720 0 obj +<< +/D [717 0 R /XYZ 99.895 723.975 null] +>> +% 716 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 774 0 obj +<< +/Type /Page +/Contents 775 0 R +/Resources 773 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Annots [ 715 0 R 726 0 R 727 0 R 728 0 R 729 0 R 730 0 R 731 0 R 732 0 R 733 0 R 734 0 R 735 0 R 736 0 R 737 0 R 738 0 R 739 0 R 740 0 R 741 0 R 742 0 R 743 0 R 744 0 R 745 0 R 746 0 R 747 0 R 748 0 R 749 0 R 750 0 R 751 0 R 752 0 R 753 0 R 754 0 R 755 0 R 756 0 R 757 0 R 758 0 R 759 0 R 760 0 R 761 0 R 762 0 R 763 0 R 764 0 R 765 0 R 766 0 R 767 0 R 768 0 R 769 0 R 770 0 R 771 0 R ] +>> +% 715 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 702.323 437.338 714.383] +/A << /S /GoTo /D (subsubsection.3.2.17) >> +>> +% 726 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 690.243 348.332 702.303] +/A << /S /GoTo /D (subsubsection.3.2.18) >> +>> +% 727 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 680.814 300.871 690.223] +/A << /S /GoTo /D (subsubsection.3.2.19) >> +>> +% 728 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 668.734 313.682 677.95] +/A << /S /GoTo /D (subsection.3.3) >> +>> +% 729 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [187.567 656.654 290.829 666.064] +/A << /S /GoTo /D (subsubsection.3.3.1) >> +>> +% 730 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 201.038 405.337 213.098] -/A << /S /GoTo /D (subsubsection.3.2.9) >> +/Rect [187.567 641.925 446.194 653.984] +/A << /S /GoTo /D (subsubsection.3.3.2) >> >> -% 627 0 obj +% 731 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 188.964 371.724 201.024] -/A << /S /GoTo /D (subsubsection.3.2.10) >> +/Rect [187.567 629.845 479.97 641.905] +/A << /S /GoTo /D (subsubsection.3.3.3) >> >> -% 628 0 obj +% 732 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 176.89 309.029 188.95] -/A << /S /GoTo /D (subsubsection.3.2.11) >> +/Rect [187.567 620.415 358.813 629.825] +/A << /S /GoTo /D (subsubsection.3.3.4) >> >> -% 629 0 obj +% 733 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 167.466 350.683 176.876] -/A << /S /GoTo /D (subsubsection.3.2.12) >> +/Rect [187.567 605.686 415.509 617.745] +/A << /S /GoTo /D (subsubsection.3.3.5) >> >> -% 630 0 obj +% 734 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 152.742 303.929 164.802] -/A << /S /GoTo /D (subsubsection.3.2.13) >> +/Rect [187.567 593.606 348.332 605.666] +/A << /S /GoTo /D (subsubsection.3.3.6) >> >> -% 631 0 obj +% 735 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 140.668 324.462 152.728] -/A << /S /GoTo /D (subsubsection.3.2.14) >> +/Rect [164.653 584.176 318.663 593.586] +/A << /S /GoTo /D (subsection.3.4) >> >> -% 632 0 obj +% 736 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 128.594 309.687 140.654] -/A << /S /GoTo /D (subsubsection.3.2.15) >> +/Rect [164.653 569.446 277.409 581.506] +/A << /S /GoTo /D (subsection.3.5) >> >> -% 633 0 obj +% 737 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [136.757 116.52 314.13 128.58] -/A << /S /GoTo /D (subsubsection.3.2.16) >> +/Rect [149.709 547.56 275.386 559.281] +/A << /S /GoTo /D (section.4) >> >> -% 638 0 obj + +endstream +endobj +822 0 obj +<< +/Length 18214 +>> +stream +0 g 0 G +0 g 0 G +0 0 1 rg 0 0 1 RG +BT +/F62 9.9626 Tf 114.839 706.129 Td [(6.13)-550(psb)]TJ +ET +q +1 0 0 1 154.072 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(r)18(outine)]TJ +0 g 0 G + [-411(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(92)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(6.14)-550(psb)]TJ +ET +q +1 0 0 1 154.072 694.234 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 694.035 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-376(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(94)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(6.15)-550(psb)]TJ +ET +q +1 0 0 1 154.072 682.14 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 681.941 Td [(gefr)18(ee)-250(\227)-250(Fr)18(ees)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-758(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(95)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(6.16)-550(psb)]TJ +ET +q +1 0 0 1 154.072 670.046 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 669.846 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G + [-801(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(96)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(6.17)-550(psb)]TJ +ET +q +1 0 0 1 154.072 657.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 657.752 Td [(glob)]TJ +ET +q +1 0 0 1 177.046 657.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 180.034 657.752 Td [(to)]TJ +ET +q +1 0 0 1 189.319 657.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 192.308 657.752 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ +0 g 0 G + [-427(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(97)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -77.469 -12.094 Td [(6.18)-550(psb)]TJ +ET +q +1 0 0 1 154.072 645.857 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 645.658 Td [(loc)]TJ +ET +q +1 0 0 1 170.42 645.857 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 173.409 645.658 Td [(to)]TJ +ET +q +1 0 0 1 182.694 645.857 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 185.683 645.658 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ +0 g 0 G + [-966(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1500(99)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -70.844 -12.094 Td [(6.19)-550(psb)]TJ +ET +q +1 0 0 1 154.072 633.763 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 633.564 Td [(is)]TJ +ET +q +1 0 0 1 164.782 633.763 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 167.771 633.564 Td [(owned)-250(\227)]TJ +0 g 0 G + [-1135(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(100)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -52.932 -12.094 Td [(6.20)-550(psb)]TJ +ET +q +1 0 0 1 154.072 621.669 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 621.47 Td [(owned)]TJ +ET +q +1 0 0 1 188.064 621.669 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 191.053 621.47 Td [(index)-250(\227)]TJ +0 g 0 G + [-871(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(101)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -76.214 -12.095 Td [(6.21)-550(psb)]TJ +ET +q +1 0 0 1 154.072 609.575 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 609.375 Td [(is)]TJ +ET +q +1 0 0 1 164.782 609.575 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 167.771 609.375 Td [(local)-250(\227)]TJ +0 g 0 G + [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(102)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -52.932 -12.094 Td [(6.22)-550(psb)]TJ +ET +q +1 0 0 1 154.072 597.481 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 597.281 Td [(local)]TJ +ET +q +1 0 0 1 178.301 597.481 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 181.29 597.281 Td [(index)-250(\227)]TJ +0 g 0 G + [-1101(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(103)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -66.451 -12.094 Td [(6.23)-550(psb)]TJ +ET +q +1 0 0 1 154.072 585.386 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 585.187 Td [(get)]TJ +ET +q +1 0 0 1 171.217 585.386 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.206 585.187 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ +0 g 0 G + [-827(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(104)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -59.367 -12.094 Td [(6.24)-550(psb)]TJ +ET +q +1 0 0 1 154.072 573.292 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 573.093 Td [(get)]TJ +ET +q +1 0 0 1 171.217 573.292 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.206 573.093 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(105)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -59.367 -12.094 Td [(6.25)-550(psb)]TJ +ET +q +1 0 0 1 154.072 561.198 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 560.999 Td [(sp)]TJ +ET +q +1 0 0 1 167.87 561.198 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 170.859 560.999 Td [(getr)18(ow)-250(\227)-250(Extract)-250(r)18(ow\050s\051)-250(fr)18(om)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G + [-671(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(106)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -56.02 -12.094 Td [(6.26)-550(psb)]TJ +ET +q +1 0 0 1 154.072 549.104 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 548.905 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ +0 g 0 G + [-251(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(108)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(6.27)-550(Sorting)-250(utilities)-250(\227)]TJ +0 g 0 G + [-1157(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(109)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -14.944 -22.149 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ +0 g 0 G + [-17835(111)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -12.094 Td [(7.1)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 502.766 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 502.567 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ +0 g 0 G + [-766(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(112)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(7.2)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 490.672 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 490.472 Td [(info)-264(\227)-264(Return)-264(information)-264(abou)1(t)-264(PSBLAS)-264(parallel)-264(envir)18(on-)]TJ -19.308 -11.955 Td [(ment)]TJ +0 g 0 G + [-930(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(113)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -12.094 Td [(7.3)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 466.622 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 466.423 Td [(exit)-250(\227)-250(Exit)-250(fr)18(om)-250(PSBLAS)-250(parallel)-250(envir)18(onment)]TJ +0 g 0 G + [-823(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(114)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.4)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 454.528 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 454.329 Td [(get)]TJ +ET +q +1 0 0 1 171.217 454.528 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.206 454.329 Td [(mpi)]TJ +ET +q +1 0 0 1 192.487 454.528 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 195.476 454.329 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ +0 g 0 G + [-615(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(115)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -80.637 -12.094 Td [(7.5)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 442.434 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 442.235 Td [(get)]TJ +ET +q +1 0 0 1 171.217 442.434 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.206 442.235 Td [(mpi)]TJ +ET +q +1 0 0 1 192.487 442.434 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 195.476 442.235 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ +0 g 0 G + [-498(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(116)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -80.637 -12.094 Td [(7.6)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 430.34 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 430.141 Td [(wtime)-250(\227)-250(W)92(all)-250(clock)-250(timing)]TJ +0 g 0 G + [-499(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(117)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(7.7)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 418.246 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 418.046 Td [(barrier)-250(\227)-250(Sinchr)18(onization)-250(point)-250(parallel)-250(envir)18(onment)]TJ +0 g 0 G + [-903(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(118)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.8)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 406.151 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 405.952 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ +0 g 0 G + [-946(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(119)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.9)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 394.057 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 393.858 Td [(bcast)-250(\227)-250(Br)18(oadcast)-250(data)]TJ +0 g 0 G + [-739(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(120)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.10)-550(psb)]TJ +ET +q +1 0 0 1 154.072 381.963 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 381.764 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ +0 g 0 G + [-998(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(122)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.11)-550(psb)]TJ +ET +q +1 0 0 1 154.072 369.869 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 369.67 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ +0 g 0 G + [-610(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(124)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.095 Td [(7.12)-550(psb)]TJ +ET +q +1 0 0 1 154.072 357.775 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 357.575 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ +0 g 0 G + [-896(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(126)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.13)-550(psb)]TJ +ET +q +1 0 0 1 154.072 345.681 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 345.481 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ +0 g 0 G + [-700(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(128)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.14)-550(psb)]TJ +ET +q +1 0 0 1 154.072 333.586 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 333.387 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ +0 g 0 G + [-777(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(130)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.15)-550(psb)]TJ +ET +q +1 0 0 1 154.072 321.492 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 321.293 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(r)18(eduction)]TJ +0 g 0 G + [-710(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(132)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.16)-550(psb)]TJ +ET +q +1 0 0 1 154.072 309.398 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 309.199 Td [(snd)-250(\227)-250(Send)-250(data)]TJ +0 g 0 G + [-511(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(134)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(7.17)-550(psb)]TJ +ET +q +1 0 0 1 154.072 297.304 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 297.105 Td [(r)18(cv)-250(\227)-250(Receive)-250(data)]TJ +0 g 0 G + [-284(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(135)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -57.166 -22.15 Td [(8)-1000(Error)-250(handling)]TJ +0 g 0 G + [-24750(136)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -12.094 Td [(8.1)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 263.06 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 262.861 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(err)18(or)-250(code)-250(onto)-250(the)-250(err)18(or)-250(stack)]TJ +0 g 0 G + [-734(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(138)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(8.2)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 250.966 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 250.767 Td [(err)18(or)-250(\227)-250(Prints)-250(the)-250(err)18(or)-250(stack)-250(content)-250(and)-250(aborts)-250(execution)]TJ +0 g 0 G +0 g 0 G + [-1381(139)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -42.222 -12.094 Td [(8.3)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 238.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 238.673 Td [(set)]TJ +ET +q +1 0 0 1 169.902 238.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 172.891 238.673 Td [(errverbosity)-250(\227)-250(Sets)-250(the)-250(verbosity)-250(of)-250(err)18(or)-250(messages)]TJ +0 g 0 G + [-253(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(140)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -58.052 -12.095 Td [(8.4)-1050(psb)]TJ +ET +q +1 0 0 1 154.072 226.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 157.061 226.578 Td [(set)]TJ +ET +q +1 0 0 1 169.902 226.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 172.891 226.578 Td [(erraction)-232(\227)-231(Set)-232(the)-231(type)-232(of)-231(action)-232(to)-232(b)1(e)-232(taken)-232(upon)-231(err)18(or)]TJ -35.138 -11.955 Td [(condition)]TJ +0 g 0 G + [-481(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(141)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -37.858 -22.149 Td [(9)-1000(Utilities)]TJ +0 g 0 G + [-27780(142)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -12.095 Td [(9.1)-1450(hb)]TJ +ET +q +1 0 0 1 153.644 180.579 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 156.633 180.38 Td [(r)18(ead)-400(\227)-400(Read)-400(a)-400(sparse)-400(matrix)-400(fr)18(om)-400(a)-400(\002le)-400(in)-400(the)-400(Harwell\226)]TJ -18.88 -11.956 Td [(Boeing)-250(format)]TJ +0 g 0 G + [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(143)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -12.094 Td [(9.2)-1050(hb)]TJ +ET +q +1 0 0 1 149.659 156.529 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 152.647 156.33 Td [(write)-226(\227)-226(W)74(rite)-226(a)-226(sparse)-226(matrix)-226(to)-226(a)-225(\002le)-226(in)-226(the)-226(Harwell\226Boeing)]TJ -14.894 -11.955 Td [(format)]TJ +0 g 0 G + [-967(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(144)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -12.094 Td [(9.3)-1050(mm)]TJ +ET +q +1 0 0 1 155.945 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 158.934 132.281 Td [(mat)]TJ +ET +q +1 0 0 1 176.558 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 179.546 132.281 Td [(r)18(ead)-265(\227)-265(Read)-265(a)-265(sparse)-265(matrix)-265(fr)18(om)-265(a)-265(\002le)-265(in)-265(the)-265(Matrix-)]TJ -41.793 -11.955 Td [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(145)]TJ +0 g 0 G +0 g 0 G + 129.649 -29.888 Td [(iii)]TJ +0 g 0 G +ET + +endstream +endobj +862 0 obj << -/D [636 0 R /XYZ 98.895 753.953 null] +/Length 13187 >> -% 639 0 obj +stream +0 g 0 G +0 g 0 G +0 0 1 rg 0 0 1 RG +BT +/F62 9.9626 Tf 165.649 706.129 Td [(9.4)-1050(mm)]TJ +ET +q +1 0 0 1 206.755 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 209.743 706.129 Td [(array)]TJ +ET +q +1 0 0 1 233.713 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 236.702 706.129 Td [(r)18(ead)-281(\227)-281(Read)-281(a)-281(dense)-281(array)-281(fr)18(om)-281(a)-281(\002le)-281(in)-281(the)-281(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(146)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -11.955 Td [(9.5)-1050(mm)]TJ +ET +q +1 0 0 1 206.755 682.418 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 209.743 682.219 Td [(mat)]TJ +ET +q +1 0 0 1 227.367 682.418 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 230.356 682.219 Td [(write)-333(\227)-333(W)74(rite)-334(a)-333(sparse)-333(matrix)-333(to)-333(a)-334(\002le)-333(in)-333(the)-333(Matrix-)]TJ -41.793 -11.956 Td [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(147)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -22.914 -11.955 Td [(9.6)-1050(mm)]TJ +ET +q +1 0 0 1 206.755 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 209.743 658.308 Td [(array)]TJ +ET +q +1 0 0 1 233.713 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 236.702 658.308 Td [(write)-234(\227)-234(W)74(rite)-234(a)-234(dense)-234(array)-234(fr)18(om)-234(a)-234(\002le)-234(in)-234(the)-234(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ +0 g 0 G + [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(148)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -37.858 -21.918 Td [(10)-500(Preconditioner)-250(routines)]TJ +0 g 0 G + [-20696(150)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -11.955 Td [(10.1)-550(init)-250(\227)-250(Initialize)-250(a)-250(pr)18(econditioner)]TJ +0 g 0 G + [-772(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(151)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.2)-550(build)-250(\227)-250(Builds)-250(a)-250(pr)18(econditioner)]TJ +0 g 0 G + [-970(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(152)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.3)-550(apply)-250(\227)-250(Pr)18(econditioner)-250(application)-250(r)18(outine)]TJ +0 g 0 G + [-421(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(154)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.4)-550(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(curr)18(ent)-250(pr)18(econditioner)]TJ +0 g 0 G + [-350(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(155)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.956 Td [(10.5)-550(clone)-250(\227)-250(clone)-250(curr)18(ent)-250(pr)18(econditioner)]TJ +0 g 0 G + [-260(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(156)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(10.6)-550(fr)18(ee)-250(\227)-250(Fr)18(ee)-250(a)-250(pr)18(econditioner)]TJ +0 g 0 G + [-341(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(157)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -14.944 -21.918 Td [(11)-500(Iterative)-250(Methods)]TJ +0 g 0 G + [-23362(158)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -11.955 Td [(11.1)-550(psb)]TJ +ET +q +1 0 0 1 204.881 519.031 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 207.87 518.831 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ +0 g 0 G + [-716(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(159)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -57.165 -21.918 Td [(12)-500(Extensions)]TJ +0 g 0 G + [-26557(162)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -11.955 Td [(12.1)-550(Using)-250(the)-250(extensions)]TJ +0 g 0 G + [-783(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(162)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(12.2)-550(Extensions')-250(Data)-250(Str)8(uctur)18(es)]TJ +0 g 0 G + [-797(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(163)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(12.3)-550(CPU-class)-250(extensions)]TJ +0 g 0 G + [-544(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(163)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + 0 -11.955 Td [(12.4)-550(CUDA-class)-250(extensions)]TJ +0 g 0 G + [-346(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(170)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG +/F59 9.9626 Tf -14.944 -21.918 Td [(13)-500(CUDA)-250(Environment)-250(Routines)]TJ +0 g 0 G + [-17779(171)]TJ +0 0 1 rg 0 0 1 RG +/F62 9.9626 Tf 14.944 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 415.419 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 415.22 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 415.419 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 415.22 Td [(init)]TJ +0 g 0 G + [-304(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(171)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 403.464 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 403.265 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 403.464 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 403.265 Td [(exit)]TJ +0 g 0 G + [-932(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(171)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.956 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 391.509 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 391.309 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 391.509 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 391.309 Td [(DeviceSync)]TJ +0 g 0 G + [-405(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 379.554 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 379.354 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 379.554 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 379.354 Td [(getDeviceCount)]TJ +0 g 0 G + [-635(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 367.598 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 367.399 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 367.598 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 367.399 Td [(getDevice)]TJ +0 g 0 G + [-401(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 355.643 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 355.444 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 355.643 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 355.444 Td [(setDevice)]TJ +0 g 0 G + [-533(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 343.688 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 343.489 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 343.688 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 343.489 Td [(DeviceHasUV)111(A)]TJ +0 g 0 G + [-839(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 331.733 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 331.534 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 331.733 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 331.534 Td [(W)92(arpSize)]TJ +0 g 0 G + [-595(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.956 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 319.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 319.578 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 319.778 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 319.578 Td [(MultiPr)18(ocessors)]TJ +0 g 0 G + [-674(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 307.823 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 307.623 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 307.823 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 307.623 Td [(MaxThr)18(eadsPerMP)]TJ +0 g 0 G + [-718(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(172)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 295.867 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 295.668 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 295.867 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 295.668 Td [(MaxRegisterPerBlock)]TJ +0 g 0 G + [-538(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(173)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 283.912 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 283.713 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 283.912 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 283.713 Td [(MemoryClockRate)]TJ +0 g 0 G + [-970(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(173)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 271.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 271.758 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 271.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 271.758 Td [(MemoryBusW)55(idth)]TJ +0 g 0 G + [-346(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(173)]TJ +0 g 0 G +0 0 1 rg 0 0 1 RG + -44.393 -11.955 Td [(psb)]TJ +ET +q +1 0 0 1 181.967 260.002 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 184.956 259.803 Td [(cuda)]TJ +ET +q +1 0 0 1 207.053 260.002 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 210.042 259.803 Td [(MemoryPeakBandwidth)]TJ +0 g 0 G + [-652(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ +0 g 0 G + [-1000(173)]TJ +0 g 0 G +0 g 0 G + 108.254 -169.365 Td [(iv)]TJ +0 g 0 G +ET + +endstream +endobj +777 0 obj << -/D [636 0 R /XYZ 99.895 723.975 null] +/Type /ObjStm +/N 100 +/First 926 +/Length 16374 >> -% 635 0 obj +stream +738 0 739 152 740 304 741 456 742 608 743 760 744 912 745 1064 746 1216 747 1367 +748 1520 749 1673 750 1826 751 1979 752 2132 753 2285 754 2438 755 2585 756 2735 757 2887 +758 3039 759 3191 760 3338 761 3490 762 3642 763 3794 764 3946 765 4098 766 4250 767 4402 +768 4554 769 4706 770 4859 771 5010 776 5161 773 5218 821 5298 772 5800 778 5953 779 6105 +780 6258 781 6409 782 6562 783 6715 784 6868 785 7020 786 7172 787 7325 788 7478 789 7631 +790 7784 791 7936 792 8089 793 8235 794 8387 824 8539 795 8690 796 8842 797 8994 798 9146 +799 9298 800 9448 801 9600 802 9752 803 9905 804 10058 805 10209 806 10361 807 10514 808 10667 +809 10820 810 10973 811 11119 812 11271 813 11423 814 11575 825 11727 815 11878 816 12024 826 12176 +817 12327 827 12479 818 12630 828 12782 823 12931 820 12987 861 13067 819 13473 864 13625 829 13777 +865 13929 830 14081 866 14233 831 14385 832 14530 833 14683 834 14836 835 14989 836 15142 837 15295 +% 738 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 535.241 380.451 547.301] +/A << /S /GoTo /D (subsection.4.1) >> >> -% 694 0 obj +% 739 0 obj << -/Type /Page -/Contents 695 0 R -/Resources 693 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 634 0 R 646 0 R 647 0 R 648 0 R 649 0 R 650 0 R 651 0 R 652 0 R 653 0 R 654 0 R 655 0 R 656 0 R 657 0 R 658 0 R 659 0 R 660 0 R 661 0 R 662 0 R 663 0 R 664 0 R 665 0 R 666 0 R 667 0 R 668 0 R 669 0 R 670 0 R 671 0 R 672 0 R 673 0 R 674 0 R 675 0 R 676 0 R 677 0 R 678 0 R 679 0 R 680 0 R 681 0 R 682 0 R 683 0 R 684 0 R 685 0 R 686 0 R 687 0 R 688 0 R 689 0 R 690 0 R 691 0 R ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 523.162 302.763 535.221] +/A << /S /GoTo /D (subsection.4.2) >> >> -% 634 0 obj +% 740 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 702.323 437.338 714.383] -/A << /S /GoTo /D (subsubsection.3.2.17) >> +/Rect [164.653 511.082 362.977 523.142] +/A << /S /GoTo /D (subsection.4.3) >> >> -% 646 0 obj +% 741 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 690.243 348.332 702.303] -/A << /S /GoTo /D (subsubsection.3.2.18) >> +/Rect [164.653 499.002 354.758 511.062] +/A << /S /GoTo /D (subsection.4.4) >> >> -% 647 0 obj +% 742 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 680.814 300.871 690.223] -/A << /S /GoTo /D (subsubsection.3.2.19) >> +/Rect [164.653 486.923 379.844 498.982] +/A << /S /GoTo /D (subsection.4.5) >> >> -% 648 0 obj +% 743 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 668.734 313.682 677.95] -/A << /S /GoTo /D (subsection.3.3) >> +/Rect [164.653 474.843 329.154 486.903] +/A << /S /GoTo /D (subsection.4.6) >> >> -% 649 0 obj +% 744 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 656.654 290.829 666.064] -/A << /S /GoTo /D (subsubsection.3.3.1) >> +/Rect [164.653 462.763 394.738 474.823] +/A << /S /GoTo /D (subsection.4.7) >> >> -% 650 0 obj +% 745 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 641.925 446.194 653.984] -/A << /S /GoTo /D (subsubsection.3.3.2) >> +/Rect [164.653 450.684 329.154 462.743] +/A << /S /GoTo /D (subsection.4.8) >> >> -% 651 0 obj +% 746 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 629.845 479.97 641.905] -/A << /S /GoTo /D (subsubsection.3.3.3) >> +/Rect [164.653 438.604 394.24 450.663] +/A << /S /GoTo /D (subsection.4.9) >> >> -% 652 0 obj +% 747 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 620.415 358.813 629.825] -/A << /S /GoTo /D (subsubsection.3.3.4) >> +/Rect [164.653 426.524 362.499 438.584] +/A << /S /GoTo /D (subsection.4.10) >> >> -% 653 0 obj +% 748 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 605.686 415.509 617.745] -/A << /S /GoTo /D (subsubsection.3.3.5) >> +/Rect [164.653 414.444 387.276 426.504] +/A << /S /GoTo /D (subsection.4.11) >> >> -% 654 0 obj +% 749 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [187.567 593.606 348.332 605.666] -/A << /S /GoTo /D (subsubsection.3.3.6) >> +/Rect [164.653 402.365 425.761 414.424] +/A << /S /GoTo /D (subsection.4.12) >> >> -% 655 0 obj +% 750 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 584.176 318.663 593.586] -/A << /S /GoTo /D (subsection.3.4) >> +/Rect [164.653 390.285 353.991 402.345] +/A << /S /GoTo /D (subsection.4.13) >> >> -% 656 0 obj +% 751 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 569.446 277.409 581.506] -/A << /S /GoTo /D (subsection.3.5) >> +/Rect [164.653 378.205 331.346 390.265] +/A << /S /GoTo /D (subsection.4.14) >> >> -% 657 0 obj +% 752 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 547.56 275.386 559.281] -/A << /S /GoTo /D (section.4) >> +/Rect [164.653 366.126 333.538 378.185] +/A << /S /GoTo /D (subsection.4.15) >> >> -% 658 0 obj +% 753 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 535.241 380.451 547.301] -/A << /S /GoTo /D (subsection.4.1) >> +/Rect [164.653 354.046 337.602 366.106] +/A << /S /GoTo /D (subsection.4.16) >> >> -% 659 0 obj +% 754 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 523.162 302.763 535.221] -/A << /S /GoTo /D (subsection.4.2) >> +/Rect [149.709 334.551 280.368 343.701] +/A << /S /GoTo /D (section.5) >> >> -% 660 0 obj +% 755 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 511.082 362.977 523.142] -/A << /S /GoTo /D (subsection.4.3) >> +/Rect [164.653 319.841 362.031 331.9] +/A << /S /GoTo /D (subsection.5.1) >> >> -% 661 0 obj +% 756 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 499.002 354.758 511.062] -/A << /S /GoTo /D (subsection.4.4) >> +/Rect [164.653 307.761 313.065 319.821] +/A << /S /GoTo /D (subsection.5.2) >> >> -% 662 0 obj +% 757 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 486.923 379.844 498.982] -/A << /S /GoTo /D (subsection.4.5) >> +/Rect [164.653 295.681 376.127 307.741] +/A << /S /GoTo /D (subsection.5.3) >> >> -% 663 0 obj +% 758 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 474.843 329.154 486.903] -/A << /S /GoTo /D (subsection.4.6) >> +/Rect [164.653 283.602 376.187 295.661] +/A << /S /GoTo /D (subsection.5.4) >> >> -% 664 0 obj +% 759 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 462.763 394.738 474.823] -/A << /S /GoTo /D (subsection.4.7) >> +/Rect [149.709 261.636 289.504 273.257] +/A << /S /GoTo /D (section.6) >> >> -% 665 0 obj +% 760 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 450.684 329.154 462.743] -/A << /S /GoTo /D (subsection.4.8) >> +/Rect [164.653 249.397 412.092 261.456] +/A << /S /GoTo /D (subsection.6.1) >> >> -% 666 0 obj +% 761 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 438.604 394.24 450.663] -/A << /S /GoTo /D (subsection.4.9) >> +/Rect [164.653 237.317 428.052 249.377] +/A << /S /GoTo /D (subsection.6.2) >> >> -% 667 0 obj +% 762 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 426.524 362.499 438.584] -/A << /S /GoTo /D (subsection.4.10) >> +/Rect [164.653 225.237 445.915 237.297] +/A << /S /GoTo /D (subsection.6.3) >> >> -% 668 0 obj +% 763 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 414.444 387.276 426.504] -/A << /S /GoTo /D (subsection.4.11) >> +/Rect [164.653 213.158 407.011 225.217] +/A << /S /GoTo /D (subsection.6.4) >> >> -% 669 0 obj +% 764 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 402.365 425.761 414.424] -/A << /S /GoTo /D (subsection.4.12) >> +/Rect [164.653 201.078 400.356 213.138] +/A << /S /GoTo /D (subsection.6.5) >> >> -% 670 0 obj +% 765 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 390.285 353.991 402.345] -/A << /S /GoTo /D (subsection.4.13) >> +/Rect [164.653 188.998 461.277 201.058] +/A << /S /GoTo /D (subsection.6.6) >> >> -% 671 0 obj +% 766 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 378.205 331.346 390.265] -/A << /S /GoTo /D (subsection.4.14) >> +/Rect [164.653 176.918 355.017 188.978] +/A << /S /GoTo /D (subsection.6.7) >> >> -% 672 0 obj +% 767 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 366.126 333.538 378.185] -/A << /S /GoTo /D (subsection.4.15) >> +/Rect [164.653 164.839 446.841 176.898] +/A << /S /GoTo /D (subsection.6.8) >> >> -% 673 0 obj +% 768 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 354.046 337.602 366.106] -/A << /S /GoTo /D (subsection.4.16) >> +/Rect [164.653 152.759 387.206 164.819] +/A << /S /GoTo /D (subsection.6.9) >> >> -% 674 0 obj +% 769 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 334.551 280.368 343.701] -/A << /S /GoTo /D (section.5) >> +/Rect [164.653 140.679 343.281 152.739] +/A << /S /GoTo /D (subsection.6.10) >> >> -% 675 0 obj +% 770 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 319.841 362.031 331.9] -/A << /S /GoTo /D (subsection.5.1) >> +/Rect [164.653 128.6 460.789 140.659] +/A << /S /GoTo /D (subsection.6.11) >> >> -% 676 0 obj +% 771 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 307.761 313.065 319.821] -/A << /S /GoTo /D (subsection.5.2) >> +/Rect [164.653 116.52 352.646 128.58] +/A << /S /GoTo /D (subsection.6.12) >> >> -% 677 0 obj +% 776 0 obj +<< +/D [774 0 R /XYZ 149.705 753.953 null] +>> +% 773 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 821 0 obj +<< +/Type /Page +/Contents 822 0 R +/Resources 820 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Annots [ 772 0 R 778 0 R 779 0 R 780 0 R 781 0 R 782 0 R 783 0 R 784 0 R 785 0 R 786 0 R 787 0 R 788 0 R 789 0 R 790 0 R 791 0 R 792 0 R 793 0 R 794 0 R 824 0 R 795 0 R 796 0 R 797 0 R 798 0 R 799 0 R 800 0 R 801 0 R 802 0 R 803 0 R 804 0 R 805 0 R 806 0 R 807 0 R 808 0 R 809 0 R 810 0 R 811 0 R 812 0 R 813 0 R 814 0 R 825 0 R 815 0 R 816 0 R 826 0 R 817 0 R 827 0 R 818 0 R 828 0 R ] +>> +% 772 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 295.681 376.127 307.741] -/A << /S /GoTo /D (subsection.5.3) >> +/Rect [113.843 702.323 330.917 714.383] +/A << /S /GoTo /D (subsection.6.13) >> >> -% 678 0 obj +% 778 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 283.602 376.187 295.661] -/A << /S /GoTo /D (subsection.5.4) >> +/Rect [113.843 690.229 308.85 702.289] +/A << /S /GoTo /D (subsection.6.14) >> >> -% 679 0 obj +% 779 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 261.636 289.504 273.257] -/A << /S /GoTo /D (section.6) >> +/Rect [113.843 678.135 290.101 690.194] +/A << /S /GoTo /D (subsection.6.15) >> >> -% 680 0 obj +% 780 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 249.397 412.092 261.456] -/A << /S /GoTo /D (subsection.6.1) >> +/Rect [113.843 666.041 386.806 678.1] +/A << /S /GoTo /D (subsection.6.16) >> >> -% 681 0 obj +% 781 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 237.317 428.052 249.377] -/A << /S /GoTo /D (subsection.6.2) >> +/Rect [113.843 653.946 368.116 666.006] +/A << /S /GoTo /D (subsection.6.17) >> >> -% 682 0 obj +% 782 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 225.237 445.915 237.297] -/A << /S /GoTo /D (subsection.6.3) >> +/Rect [113.843 641.852 370.219 653.912] +/A << /S /GoTo /D (subsection.6.18) >> >> -% 683 0 obj +% 783 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 213.158 407.011 225.217] -/A << /S /GoTo /D (subsection.6.4) >> +/Rect [113.843 629.758 214.116 641.818] +/A << /S /GoTo /D (subsection.6.19) >> >> -% 684 0 obj +% 784 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 201.078 400.356 213.138] -/A << /S /GoTo /D (subsection.6.5) >> +/Rect [113.843 617.664 231.69 629.724] +/A << /S /GoTo /D (subsection.6.20) >> >> -% 685 0 obj +% 785 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 188.998 461.277 201.058] -/A << /S /GoTo /D (subsection.6.6) >> +/Rect [113.843 605.57 204.353 617.629] +/A << /S /GoTo /D (subsection.6.21) >> >> -% 686 0 obj +% 786 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 176.918 355.017 188.978] -/A << /S /GoTo /D (subsection.6.7) >> +/Rect [113.843 593.476 221.927 605.535] +/A << /S /GoTo /D (subsection.6.22) >> >> -% 687 0 obj +% 787 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 164.839 446.841 176.898] -/A << /S /GoTo /D (subsection.6.8) >> +/Rect [113.843 581.381 379.076 593.441] +/A << /S /GoTo /D (subsection.6.23) >> >> -% 688 0 obj +% 788 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 152.759 387.206 164.819] -/A << /S /GoTo /D (subsection.6.9) >> +/Rect [113.843 569.287 359.768 581.347] +/A << /S /GoTo /D (subsection.6.24) >> >> -% 689 0 obj +% 789 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 140.679 343.281 152.739] -/A << /S /GoTo /D (subsection.6.10) >> +/Rect [113.843 557.193 373.158 569.253] +/A << /S /GoTo /D (subsection.6.25) >> >> -% 690 0 obj +% 790 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 128.6 460.789 140.659] -/A << /S /GoTo /D (subsection.6.11) >> +/Rect [113.843 545.099 287.68 557.158] +/A << /S /GoTo /D (subsection.6.26) >> >> -% 691 0 obj +% 791 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 116.52 352.646 128.58] -/A << /S /GoTo /D (subsection.6.12) >> ->> -% 696 0 obj -<< -/D [694 0 R /XYZ 149.705 753.953 null] ->> -% 693 0 obj -<< -/Font << /F54 586 0 R /F51 584 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 740 0 obj -<< -/Type /Page -/Contents 741 0 R -/Resources 739 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 692 0 R 697 0 R 698 0 R 699 0 R 700 0 R 701 0 R 702 0 R 703 0 R 704 0 R 705 0 R 706 0 R 707 0 R 708 0 R 709 0 R 710 0 R 711 0 R 712 0 R 713 0 R 743 0 R 714 0 R 715 0 R 716 0 R 717 0 R 718 0 R 719 0 R 720 0 R 721 0 R 722 0 R 723 0 R 724 0 R 725 0 R 726 0 R 727 0 R 728 0 R 729 0 R 730 0 R 731 0 R 732 0 R 733 0 R 744 0 R 734 0 R 735 0 R 745 0 R 736 0 R 746 0 R 737 0 R 747 0 R ] +/Rect [113.843 533.005 221.369 545.064] +/A << /S /GoTo /D (subsection.6.27) >> >> -% 692 0 obj +% 792 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 702.323 330.917 714.383] -/A << /S /GoTo /D (subsection.6.13) >> +/Rect [98.899 513.485 251.974 522.815] +/A << /S /GoTo /D (section.7) >> >> -% 697 0 obj +% 793 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 690.229 308.85 702.289] -/A << /S /GoTo /D (subsection.6.14) >> +/Rect [113.843 498.761 364.739 510.821] +/A << /S /GoTo /D (subsection.7.1) >> >> -% 698 0 obj +% 794 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 678.135 290.101 690.194] -/A << /S /GoTo /D (subsection.6.15) >> ->> - -endstream -endobj -762 0 obj -<< -/Length 4437 +/Rect [113.843 486.667 444.603 498.726] +/A << /S /GoTo /D (subsection.7.2) >> >> -stream -0 g 0 G -0 g 0 G -0 0 1 rg 0 0 1 RG -BT -/F54 9.9626 Tf 165.649 706.129 Td [(9.4)-1050(mm)]TJ -ET -q -1 0 0 1 206.755 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 209.743 706.129 Td [(array)]TJ -ET -q -1 0 0 1 233.713 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 236.702 706.129 Td [(r)18(ead)-281(\227)-281(Read)-281(a)-281(dense)-281(array)-281(fr)18(om)-281(a)-281(\002le)-281(in)-281(the)-281(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(146)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -11.955 Td [(9.5)-1050(mm)]TJ -ET -q -1 0 0 1 206.755 682.418 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 209.743 682.219 Td [(mat)]TJ -ET -q -1 0 0 1 227.367 682.418 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 230.356 682.219 Td [(write)-333(\227)-333(W)74(rite)-334(a)-333(sparse)-333(matrix)-333(to)-333(a)-334(\002le)-333(in)-333(the)-333(Matrix-)]TJ -41.793 -11.956 Td [(Market)-250(format)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(147)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - -22.914 -11.955 Td [(9.6)-1050(mm)]TJ -ET -q -1 0 0 1 206.755 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 209.743 658.308 Td [(array)]TJ -ET -q -1 0 0 1 233.713 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 236.702 658.308 Td [(write)-234(\227)-234(W)74(rite)-234(a)-234(dense)-234(array)-234(fr)18(om)-234(a)-234(\002le)-234(in)-234(the)-234(Matrix-)]TJ -48.139 -11.955 Td [(Market)-250(format)]TJ -0 g 0 G - [-515(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(148)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -37.858 -21.918 Td [(10)-500(Preconditioner)-250(routines)]TJ -0 g 0 G - [-20696(150)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -11.955 Td [(10.1)-550(init)-250(\227)-250(Initialize)-250(a)-250(pr)18(econditioner)]TJ -0 g 0 G - [-772(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(151)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.2)-550(build)-250(\227)-250(Builds)-250(a)-250(pr)18(econditioner)]TJ -0 g 0 G - [-970(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(152)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.3)-550(apply)-250(\227)-250(Pr)18(econditioner)-250(application)-250(r)18(outine)]TJ -0 g 0 G - [-421(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(154)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.4)-550(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(curr)18(ent)-250(pr)18(econditioner)]TJ -0 g 0 G - [-350(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(155)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.956 Td [(10.5)-550(clone)-250(\227)-250(clone)-250(curr)18(ent)-250(pr)18(econditioner)]TJ -0 g 0 G - [-260(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(156)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG - 0 -11.955 Td [(10.6)-550(fr)18(ee)-250(\227)-250(Fr)18(ee)-250(a)-250(pr)18(econditioner)]TJ -0 g 0 G - [-341(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(157)]TJ -0 g 0 G -0 0 1 rg 0 0 1 RG -/F51 9.9626 Tf -14.944 -21.918 Td [(11)-500(Iterative)-250(Methods)]TJ -0 g 0 G - [-23362(158)]TJ -0 0 1 rg 0 0 1 RG -/F54 9.9626 Tf 14.944 -11.955 Td [(11.1)-550(psb)]TJ -ET -q -1 0 0 1 204.881 519.031 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 207.87 518.831 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ -0 g 0 G - [-716(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)-500(.)]TJ -0 g 0 G - [-1000(159)]TJ -0 g 0 G -0 g 0 G - 110.426 -428.393 Td [(iv)]TJ -0 g 0 G -ET - -endstream -endobj -779 0 obj +% 824 0 obj << -/Length 8044 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [98.899 477.362 161.365 486.467] +/A << /S /GoTo /D (subsection.7.2) >> >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 99.895 705.784 Td [(1)-1000(Introduction)]TJ/F54 9.9626 Tf 0 -22.913 Td [(The)-272(PSBLAS)-271(library)111(,)-277(developed)-272(with)-272(t)1(he)-272(aim)-272(to)-271(facilitate)-272(the)-272(parallelization)-271(of)]TJ 0 -11.955 Td [(computationally)-348(intensive)-347(scienti\002c)-348(applications,)-372(is)-347(designed)-348(to)-348(addr)18(ess)-347(par)18(-)]TJ 0 -11.955 Td [(allel)-282(implementation)-283(of)-282(iterative)-282(solvers)-283(for)-282(sparse)-282(linear)-283(systems)-282(thr)18(ough)-282(the)]TJ 0 -11.955 Td [(distributed)-232(memory)-232(paradigm.)-304(It)-232(includes)-233(r)18(outines)-232(for)-232(multiplying)-232(sparse)-232(ma-)]TJ 0 -11.955 Td [(trices)-211(by)-211(dense)-211(matrices,)-219(solving)-211(block)-211(diagonal)-211(systems)-211(with)-211(triangular)-211(diago-)]TJ 0 -11.956 Td [(nal)-229(entries,)-233(pr)18(epr)18(ocessing)-228(sparse)-229(matrices,)-233(and)-228(contains)-229(additional)-229(r)18(outines)-228(for)]TJ 0 -11.955 Td [(dense)-292(matrix)-292(operations.)-436(The)-292(curr)18(ent)-292(implementation)-292(of)-292(PSBLAS)-292(addr)18(esses)-292(a)]TJ 0 -11.955 Td [(distributed)-250(memory)-250(execution)-250(model)-250(operating)-250(with)-250(message)-250(passing.)]TJ 14.944 -12.064 Td [(The)-267(PSBLAS)-267(library)-268(version)-267(3)-267(is)-267(implemented)-267(in)-267(the)-268(Fortran)-267(2003)-267([)]TJ -1 0 0 rg 1 0 0 RG - [(17)]TJ -0 g 0 G - [(])-267(pr)18(o-)]TJ -14.944 -11.955 Td [(gramming)-278(language,)-284(with)-277(r)18(euse)-278(and/or)-278(adaptation)-277(of)-278(existing)-277(Fortran)-278(77)-277(and)]TJ 0 -11.955 Td [(Fortran)-250(95)-250(softwar)18(e,)-250(plus)-250(a)-250(handful)-250(of)-250(C)-250(r)18(outines.)]TJ 14.944 -12.064 Td [(The)-391(use)-392(of)-391(Fortran)-392(2003)-391(of)18(fers)-392(a)-391(number)-391(of)-392(advantages)-391(over)-392(Fortran)-391(95,)]TJ -14.944 -11.955 Td [(mostly)-385(in)-385(the)-385(handling)-385(of)-385(r)18(equir)18(ements)-385(for)-385(evolution)-385(and)-385(adaptation)-385(of)-385(the)]TJ 0 -11.956 Td [(library)-431(to)-432(new)-431(computing)-432(ar)18(chitectur)18(es)-431(and)-432(integration)-431(of)-432(new)-431(algorithms.)]TJ 0 -11.955 Td [(For)-365(a)-365(detail)1(ed)-365(discussion)-365(of)-365(our)-364(design)-365(see)-365([)]TJ -1 0 0 rg 1 0 0 RG - [(11)]TJ -0 g 0 G - [(];)-422(other)-365(works)-364(discussing)-365(ad-)]TJ 0 -11.955 Td [(vanced)-213(pr)18(ogramming)-214(in)-213(Fortran)-213(2003)-213(include)-214([)]TJ -1 0 0 rg 1 0 0 RG - [(1)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-213(18)]TJ -0 g 0 G - [(];)-225(suf)18(\002cient)-214(support)-213(for)-213(For)18(-)]TJ 0 -11.955 Td [(tran)-315(2003)-314(is)-315(now)-314(available)-315(fr)18(om)-314(many)-315(compilers,)-331(including)-314(the)-315(GNU)-314(Fortran)]TJ 0 -11.955 Td [(compiler)-250(fr)18(om)-250(the)-250(Fr)18(ee)-250(Softwar)18(e)-250(Foundation)-250(\050as)-250(of)-250(version)-250(4.8\051.)]TJ 14.944 -12.064 Td [(Pr)18(evious)-311(appr)18(oaches)-312(have)-311(been)-311(based)-311(on)-312(mixing)-311(Fortran)-311(95,)-327(with)-311(its)-311(sup-)]TJ -14.944 -11.955 Td [(port)-249(for)-249(object-based)-249(design,)-249(with)-249(other)-249(languages;)-249(these)-249(have)-249(been)-249(advocated)]TJ 0 -11.956 Td [(by)-346(a)-346(number)-346(of)-347(authors,)-370(e.g.)-346([)]TJ -1 0 0 rg 1 0 0 RG - [(16)]TJ -0 g 0 G - [(].)-598(Mor)18(eover)74(,)-371(the)-346(Fortran)-346(95)-346(facilities)-346(for)-346(dy-)]TJ 0 -11.955 Td [(namic)-411(memory)-410(management)-411(and)-410(interface)-411(overloading)-410(gr)18(eatly)-411(enhance)-410(the)]TJ 0 -11.955 Td [(usability)-397(of)-398(the)-397(PSBLAS)-398(subr)18(outines.)-752(In)-398(this)-397(way)111(,)-434(the)-398(library)-397(can)-398(take)-397(car)18(e)]TJ 0 -11.955 Td [(of)-267(r)8(untime)-266(memory)-267(r)18(equir)18(ements)-266(that)-267(ar)18(e)-266(quite)-267(dif)18(\002cult)-267(or)-266(even)-267(impossible)-266(to)]TJ 0 -11.955 Td [(pr)18(edict)-250(at)-250(implementation)-250(or)-250(compilation)-250(time.)]TJ 14.944 -12.064 Td [(The)-249(pr)18(esentation)-250(of)-249(the)-250(PSBLAS)-249(library)-249(follows)-250(the)-249(general)-249(str)8(uctur)18(e)-250(of)-249(the)]TJ -14.944 -11.955 Td [(pr)18(oposal)-207(for)-206(serial)-207(Sparse)-207(BLAS)-207([)]TJ -1 0 0 rg 1 0 0 RG - [(8)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-206(9)]TJ -0 g 0 G - [(],)-216(which)-206(in)-207(its)-207(turn)-206(is)-207(based)-207(on)-207(t)1(he)-207(pr)18(oposal)]TJ 0 -11.956 Td [(for)-250(BLAS)-250(on)-250(dense)-250(matrices)-250([)]TJ -1 0 0 rg 1 0 0 RG - [(15)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-250(5)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-250(6)]TJ -0 g 0 G - [(].)]TJ 14.944 -12.063 Td [(The)-297(applicability)-297(of)-298(sparse)-297(iterative)-297(solvers)-297(to)-297(many)-298(dif)18(fer)18(ent)-297(ar)18(eas)-297(causes)]TJ -14.944 -11.956 Td [(some)-190(terminology)-190(pr)18(oblems)-190(because)-190(the)-190(same)-190(concept)-190(may)-190(be)-190(denoted)-190(thr)18(ough)]TJ 0 -11.955 Td [(dif)18(fer)18(ent)-271(names)-271(depending)-272(on)-271(the)-271(application)-271(ar)18(ea.)-374(The)-271(PSBLAS)-271(featur)18(es)-271(pr)18(e-)]TJ 0 -11.955 Td [(sented)-332(in)-333(this)-332(document)-332(will)-333(be)-332(discussed)-332(r)18(eferring)-333(to)-332(a)-333(\002ni)1(te)-333(dif)18(fer)18(ence)-332(dis-)]TJ 0 -11.955 Td [(cr)18(etization)-284(of)-285(a)-284(Partial)-285(Dif)18(fer)18(ential)-284(Equation)-284(\050PDE\051.)-285(However)74(,)-293(the)-284(scope)-285(of)-284(the)]TJ 0 -11.955 Td [(library)-283(is)-283(wider)-283(than)-284(that:)-376(for)-283(example,)-291(it)-283(can)-284(be)-283(applied)-283(to)-283(\002nite)-283(element)-283(dis-)]TJ 0 -11.956 Td [(cr)18(etizations)-267(of)-267(PDEs,)-271(and)-267(even)-266(to)-267(dif)18(fer)18(ent)-267(classes)-267(of)-267(pr)18(oblems)-267(such)-266(as)-267(nonlin-)]TJ 0 -11.955 Td [(ear)-250(optimization,)-250(for)-250(example)-250(in)-250(optimal)-250(contr)18(ol)-250(pr)18(oblems.)]TJ 14.944 -12.064 Td [(The)-383(design)-383(of)-383(a)-383(solver)-383(for)-384(sparse)-383(linear)-383(systems)-383(is)-383(driven)-383(by)-383(many)-383(con-)]TJ -14.944 -11.955 Td [(\003icting)-271(objectives,)-277(such)-272(as)-271(limiting)-271(occupation)-272(of)-271(storage)-271(r)18(esour)18(ces,)-277(exploiting)]TJ 0 -11.955 Td [(r)18(egularities)-274(in)-274(the)-275(input)-274(data,)-280(exploiting)-274(har)18(dwar)18(e)-275(characteristi)1(cs)-275(of)-274(the)-274(paral-)]TJ 0 -11.955 Td [(lel)-350(platform.)-610(T)92(o)-350(achieve)-350(an)-350(optimal)-350(communication)-350(to)-350(computation)-350(ratio)-350(on)]TJ 0 -11.955 Td [(distributed)-379(memory)-378(machines)-379(it)-378(is)-379(essential)-379(to)-378(keep)-379(the)]TJ/F52 9.9626 Tf 256.501 0 Td [(data)-379(locality)]TJ/F54 9.9626 Tf 54.198 0 Td [(as)-379(high)]TJ -310.699 -11.956 Td [(as)-315(possible;)-346(this)-315(can)-314(be)-315(done)-315(thr)18(ough)-314(an)-315(appr)18(opriate)-314(data)-315(allocation)-314(strategy)111(.)]TJ 0 -11.955 Td [(The)-323(choice)-323(of)-324(the)-323(pr)18(econditioner)-323(is)-323(another)-323(very)-324(important)-323(factor)-323(that)-323(af)18(fects)]TJ 0 -11.955 Td [(ef)18(\002ciency)-300(of)-300(the)-300(im)1(plemented)-300(application.)-460(Optimal)-300(data)-299(distribution)-300(r)18(equir)18(e-)]TJ 0 -11.955 Td [(ments)-300(for)-299(a)-300(given)-300(pr)18(econditioner)-299(may)-300(con\003ict)-300(with)-300(distribution)-299(r)18(equir)18(ements)]TJ 0 -11.955 Td [(of)-356(the)-356(r)18(est)-356(of)-357(the)-356(solver)74(.)-628(Finding)-356(the)-357(o)1(ptimal)-357(trade-of)18(f)-356(may)-356(be)-356(very)-356(dif)18(\002cult)]TJ 0 -11.955 Td [(because)-292(it)-291(is)-292(application)-291(dependent.)-435(Possible)-292(solutions)-291(to)-292(these)-292(pr)18(oblems)-291(and)]TJ 0 -11.956 Td [(other)-342(important)-342(inputs)-342(to)-342(the)-342(development)-342(of)-341(the)-342(PSBLAS)-342(softwar)18(e)-342(package)]TJ -0 g 0 G - 169.365 -29.887 Td [(1)]TJ -0 g 0 G -ET - -endstream -endobj -798 0 obj +% 795 0 obj << -/Length 5269 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 462.617 364.172 474.677] +/A << /S /GoTo /D (subsection.7.3) >> >> -stream -0 g 0 G -0 g 0 G -BT -/F54 9.9626 Tf 150.705 706.129 Td [(have)-292(come)-291(fr)18(om)-292(an)-292(established)-291(experience)-292(in)-292(applying)-291(the)-292(PSBLAS)-291(solvers)-292(to)]TJ 0 -11.955 Td [(computational)-250(\003uid)-250(dynamics)-250(applications.)]TJ/F51 14.3462 Tf 0 -33.474 Td [(2)-1000(General)-250(overview)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-190(PSBLAS)-190(library)-190(is)-190(designed)-190(to)-190(handle)-190(the)-190(implementation)-190(of)-190(iterative)-190(solvers)]TJ 0 -11.955 Td [(for)-275(sparse)-275(linear)-275(systems)-275(on)-275(distributed)-275(memory)-274(parallel)-275(computers.)-385(The)-275(sys-)]TJ 0 -11.955 Td [(tem)-307(coef)18(\002cient)-308(matrix)]TJ/F52 9.9626 Tf 100.571 0 Td [(A)]TJ/F54 9.9626 Tf 10.381 0 Td [(must)-307(be)-308(squar)18(e;)-336(it)-308(may)-307(be)-308(r)18(eal)-307(or)-307(complex,)-322(nonsym-)]TJ -110.952 -11.955 Td [(metric,)-301(and)-291(its)-291(sparsity)-291(pattern)-291(needs)-291(not)-291(to)-291(be)-291(symmetric.)-433(The)-291(serial)-291(compu-)]TJ 0 -11.955 Td [(tation)-240(parts)-239(ar)18(e)-240(based)-240(on)-239(the)-240(serial)-240(sparse)-239(BLAS,)-240(so)-240(that)-239(any)-240(extension)-239(made)-240(to)]TJ 0 -11.956 Td [(the)-258(data)-258(str)8(uctur)18(es)-259(of)-258(the)-258(serial)-258(kernels)-258(is)-259(available)-258(to)-258(the)-258(parallel)-258(version.)-335(The)]TJ 0 -11.955 Td [(overall)-294(design)-294(and)-294(parallelization)-294(strategy)-294(have)-294(been)-294(in\003uenced)-294(by)-294(the)-294(str)8(uc-)]TJ 0 -11.955 Td [(tur)18(e)-306(of)-307(the)-306(ScaLAP)92(ACK)-306(parallel)-307(library)111(.)-479(The)-306(layer)18(ed)-306(str)8(uctur)18(e)-306(of)-307(the)-306(PSBLAS)]TJ 0 -11.955 Td [(library)-349(is)-349(shown)-348(in)-349(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-349(1)]TJ -0 g 0 G - [(;)-398(lower)-349(layers)-349(of)-349(the)-349(library)-349(in)1(dicate)-349(an)-349(encapsu-)]TJ 0 -11.955 Td [(lation)-314(r)18(elationship)-314(with)-313(upper)-314(layers.)-502(The)-314(ongoing)-314(discussion)-313(focuses)-314(on)-314(the)]TJ 0 -11.955 Td [(Fortran)-244(2003)-244(layer)-245(immediately)-244(below)-244(the)-244(application)-244(layer)74(.)-308(The)-245(serial)-244(parts)-244(of)]TJ 0 -11.956 Td [(the)-230(computation)-230(on)-230(each)-230(pr)18(ocess)-230(ar)18(e)-230(executed)-230(thr)18(ough)-230(calls)-230(to)-230(the)-230(serial)-230(sparse)]TJ 0 -11.955 Td [(BLAS)-307(subr)18(outines.)-482(In)-307(a)-307(similar)-308(way)111(,)-321(the)-307(inter)18(-pr)18(ocess)-308(message)-307(exchanges)-307(ar)18(e)]TJ 0 -11.955 Td [(encapsulated)-244(in)-243(an)-244(applicaiton)-244(layer)-243(that)-244(has)-244(been)-243(str)18(ongly)-244(inspir)18(ed)-244(by)-243(the)-244(Ba-)]TJ 0 -11.955 Td [(sic)-314(Linear)-313(Algebra)-314(Communication)-313(Subr)18(outines)-314(\050BLACS\051)-314(library)-313([)]TJ -1 0 0 rg 1 0 0 RG - [(7)]TJ -0 g 0 G - [(].)-501(Usually)]TJ 0 -11.955 Td [(ther)18(e)-315(is)-314(no)-315(need)-315(to)-314(deal)-315(dir)18(ectly)-314(with)-315(MPI;)-315(however)74(,)-330(in)-315(some)-315(cases,)-331(MPI)-314(r)18(ou-)]TJ 0 -11.955 Td [(tines)-219(ar)18(e)-219(used)-218(dir)18(ectly)-219(to)-219(impr)18(ove)-219(ef)18(\002ciency)111(.)-299(For)-219(further)-219(details)-219(on)-218(our)-219(commu-)]TJ 0 -11.956 Td [(nication)-250(layer)-250(see)-250(Sec.)]TJ -0 0 1 rg 0 0 1 RG - [-250(7)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -1 0 0 1 258.536 281.98 cm -q -.65 0 0 .65 0 0 cm -q -1 0 0 1 0 0 cm -/Im2 Do -Q -Q -0 g 0 G -1 0 0 1 -258.536 -281.98 cm -BT -/F54 9.9626 Tf 216.385 250.1 Td [(Figur)18(e)-250(1:)-310(PSBLAS)-250(library)-250(components)-250(hierar)18(chy)111(.)]TJ -0 g 0 G -0 g 0 G - -50.736 -22.178 Td [(The)-370(type)-369(of)-370(linear)-369(system)-370(matrices)-370(that)-369(we)-370(addr)18(ess)-369(typically)-370(arise)-370(in)-369(the)]TJ -14.944 -11.955 Td [(numerical)-260(solution)-261(of)-260(PDEs;)-266(in)-260(such)-260(a)-261(context,)-263(it)-260(is)-261(necessary)-260(to)-260(pay)-261(special)-260(at-)]TJ 0 -11.955 Td [(tention)-297(to)-298(the)-297(str)8(uctur)18(e)-298(of)-297(the)-298(pr)18(oblem)-297(fr)18(om)-298(which)-297(the)-298(application)-297(originates.)]TJ 0 -11.955 Td [(The)-277(nonzer)18(o)-276(pattern)-277(of)-277(a)-276(matrix)-277(arising)-277(fr)18(om)-276(the)-277(discr)18(etization)-276(of)-277(a)-277(PDE)-276(is)-277(in-)]TJ 0 -11.956 Td [(\003uenced)-232(by)-232(various)-231(factors,)-236(such)-232(as)-232(the)-231(shape)-232(of)-232(the)-232(domain,)-235(the)-232(discr)18(etization)]TJ 0 -11.955 Td [(strategy)111(,)-313(and)-301(the)-300(equation/unknown)-301(or)18(dering.)-461(The)-301(matrix)-301(it)1(self)-301(can)-301(be)-300(inter)18(-)]TJ 0 -11.955 Td [(pr)18(eted)-291(as)-291(the)-291(adjacency)-291(matrix)-292(of)-291(the)-291(graph)-291(associated)-291(with)-291(the)-291(discr)18(etization)]TJ 0 -11.955 Td [(mesh.)]TJ 14.944 -11.955 Td [(The)-308(distribution)-308(of)-308(the)-309(coef)18(\002cient)-308(matrix)-308(for)-308(the)-308(linear)-309(system)-308(is)-308(based)-308(on)]TJ -14.944 -11.955 Td [(the)-314(\223owner)-314(computes\224)-314(r)8(ule:)-438(the)-314(variable)-314(associated)-314(to)-314(each)-314(mesh)-314(point)-314(is)-314(as-)]TJ -0 g 0 G - 169.365 -29.888 Td [(2)]TJ -0 g 0 G -ET - -endstream -endobj -794 0 obj +% 796 0 obj << -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/psblas.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 800 0 R -/BBox [0 0 197 215] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 801 0 R ->>/Font << /R8 802 0 R>> +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 450.523 358.772 462.583] +/A << /S /GoTo /D (subsection.7.4) >> >> -/Length 898 -/Filter /FlateDecode +% 797 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 438.429 307.635 450.489] +/A << /S /GoTo /D (subsection.7.5) >> >> -stream -xœµVM7 ½ëWèÖ4€Y‘ú>&@[HMl ‡¢câu·;{þý’#QvvÝKÐÂÏ{#=RüÐð«u€ÖɯÿOóÓÇl÷g³°öã¯ýá´7_ ¶çþ7ìÛ // ¢Ãl7÷¦É E„ŒŽ,E(%’ÝÌ«õîô°íúq{:ï~Üüe0ƒsµØÀíæ³yõöý›µ¼ûyc>Ÿ(Ú¿M, -¯ ¼”ìá3›"dQžÙ7_Û³ËXm2‚0— -¼î(:HhU3vܪÅ“Aª f6è=A΃AŸÄ!9`³Ç -PwæAt_UOÏ¡OæO³æl8f¢ãPHÄY¥æ«‡åÕÌ+k(*«¦XÁI䓉uyQ¥ -Åá ¢ï¹Éø*Þcv ( ñÌÿôÚÇJÊBt«FpÍ©vvË×VþÕ‘” -„rÓ‘û×&@^ -éÿ2!)Õš\‘Œ÷¼ža®8Î7L¤ÂõŸÇ )]aôà¥`;¡vç ãp½ªba=WY³¨Hj.8‰µ2Rt%ùÁ` R=EÝ 4¢{4_póXÕFwt{Š[Íf.óÈv¾†Ê‘yÎÌÆGÞ§Áxq€Èúš‹ú‚c—++SJëgÅ¡Br5XUäÛFZYMv8™\øH”˜M!‚¢%rl9ª§¸Ù›Œbõh¾0ÝgÔ3©EÅ-:º‹¼i¾>9ÎßÑò…/aëUËv±Òu‹ß`ú®›Lk„¡ÀtÕ—ßö¼E÷õ¶Ž¿áæsù—¼0ȽýßI -~·œíÕÏãÎh§ÀP‰ó+‰I‰Ë`ä8Äwsâ~L¥¼–U9‰–2ó¢ß”ÇM®F‘ -ÁóÕÞ²þ-ÃÉu1 ƒéuÄóŠ -}¦öqëXË|Vb´A—Ó6QƒŠY¡8Õ†% -Ûæ“Ž=…åËE²A5) -í}óiV<\îrãDÝ âÖß7x¤U -«Ͻ'ƒÇ›ÇÇùaÚ>=|9Êh±Büuæ¹£$îËLËàq·–ÑÃÊŸ5×k^Þ½;>íN÷Ûi™ZŠ\V+9D£­8îNËLÓG™÷×»~0+¾’”àŠ'¢ˆ±íúmw>o÷;{·=ŸŽûEý—»a¥ÃѲîîÝâ8SË4Â%ÕÇ¥_¾œžNÛ#OαéƒùüÐ -endstream -endobj -804 0 obj +% 798 0 obj << -/Filter /FlateDecode -/Subtype /Type1C -/Length 2887 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 426.335 277.737 438.395] +/A << /S /GoTo /D (subsection.7.6) >> >> -stream -xœe–iXSWÇo É=Z¤-iʦ µ­m]‘Ö*ÕVYÔZQU (! KBI ,A8,!, Ö„M‚T¥*(¶ÖQ¬ ÖœÇn:Jg9—¹|˜‹vž~˜/÷¹÷žåyß÷ÿ?¿÷Ð0‡%Fs ¤ðÓ׋SbE‹ßë±b ±’IÑü?æw0VbôåБšV82ÑÔ«Èò2*x£Óhr}›¿X’•&HHÌð|÷ppø{k×®ûóÏ&ÏãYÿñ ৠDž«©_(–¤ðEÛ<ý©ÙB¡à„g‚0K’˜îÇ[\+ä'{î‰Xæù®ÿ{žÞ^^›ÖSÍ)Ç¥éžÏ#÷< öñ ô æ'H…±iÿ?‚a˜›¯ÈO,IÛž±76ðÄ~~|P‚ Dèéåý>†­ÂbobAØ!,{ Åck0?l=€mÀvc{°MاØgØØ~Œ†½Š±°×0wªx˜ÆÇîѲiW—ì]ÒA~Êá‡}¿2ü&&ÎŒg^Æ·g YúêÒK-=½ ,Ó'æã¡E oXhóafö±rMMⲌ¸îšÉ$ãrö‘«s²£‹Ýä(ÊŒGgm†ò:‘{-e sp­A¢O…*7˜¦È+rT…/¨Q áȆic8'¡‰p¿DûvÌÐë‰UìÊòšÒZlÕÊ(.YŠÃà\UDP¢DQ¡4@;@§ðÏGš =œkîå’|ª‘‹“âLx”.ÇÏ”…ÿ»ì{T¨Ø·‡ó/(‹Êä†f»9Ío„fy1Û'7;#ÆÒÎ}„pVRÃü6Ë$Çg´†\è ‚pè«Vo= XIrôW|N5õ²w[Fàçp8k@Ø‘b•>«36Õ´6–°­ÙÔ7莼'H_Žù˜ý7xVkWRËÇÓ­'ÝáaY܉Œä¬ãù{!ˆRUwsÑ e“¾ÚÄf²„ŸŒöþ8Œ·žKUEc#¼m´ŽPÝtTNÄ°É×½Ö’ÒãçÕÈ9?ýâ —OÈ׸j1{æÊZr%É8¶gg\|뀌+˾§À“ÑkSj/2¥Aј…R4Ý„ÓUç»3HúÈ…¥­&V²«Kõ°‚SzÍîB"Ãssƒ -kµiqÖ±F|g©º~ Ð&¼7©U3Zör@o£Wv=^ÂK;ÌãÞÇ ËËÕ¡ -7´f'»ó²ýìs;H@Ò£vú ¶wrœÐ#Jã-´{3t=áή+¯.5@ÐVÍã’å8Œ,(Ø—CiW`Â÷”+Lp )Þ}æ¼µ²Z«©åÔ«j ´šl=2‹ V”Åý ×–…IÔAr7'â hF“ƒ¨ºŸRôïs.¬D"e³‘2ÏÙ–¥Üù6JcÚt•VZÁD جlØÚ˜'Ñ›Œ“u_Žß)4ªà.°° ‡[sÕ;´”U{Ìø'ª:x z”†€á¦iα,êfç ×Ï-Ö:ôtƒ]„V0r™ÅÅy%%EЭjÊru€ÕP%•V¤zlŽðé>1ÍHêÍlH‡B7^²8B 46Ë9²¶¼õ8È&½ŒL}MEUV`5¬+n¡Äh(´Y‹=î5që|zÏA;—ľ×+Û Õm Ëvv¼SàÝBÕ¸K3F8v;‘–Ü?ãšEUD›us5Ìš’òBNA^a^†]X³¹…Zwx²´¸¼°n~A^dn錹r®¿u¬‡£®•¥)4R说~ÉE§qÖì3梌æy3 ástêÓØp§:gËIªTç͸OyvüÔ©ù¦Î8SÌäy9>ûâ,9Ašñà µÕùò÷è݇.¬âÀ×l…*G«„ £ n˜‹Âq8¦íP¶Jí1æ ÖûFfX2[Z›,Í%º=·¸º¤ -êµ»yàœUt˜s'7ìWäÇð¥™*L¬ˆÀaÞäð`ÓØ$‡e ×5Ê=NÁ&Co?5Dnüž …ÚÕ7 'a«Øøx‡wºQ¬öû ]£\K¾L™vÂÜÕßT§5rêS šZÌ-v^ûÁýa’£©ÜT^~BÉ6à“üyƒ ýrsüño4QFÌ­…wºœðhh@çaÿÛÊF4 -´ÇÐ0»½¸¡¤NË5gZ¯vÛ'á×pHÕ/é81üm=…Œ&«Ò7¿FýŸsaK¾fGÁ‹ân/W.š%$dV½ÞŠ<®ykMP$‰gªJË2ž3W4ŽÞ· —6”tZg£Ùn¡ìûVH"\Ù§?Á3¡²G»‡þšÜÝ‹ºhLø^]~Í"KÎ""Œ-ÉÈ‹›3l-ͶŒÕô¾PÙ¥‚Xœ§!Õ3Ö -"d~+[Æ,ÊÈÍU@ TT(ª«§.>¾*Ù#òI¢DaV, ‡Û†CCÐmþØñ6~µZ'ƒéàÓÈ£~Þ—S$'ƒÙI&2š™¬@s©¡ÎäQ ŠÌÔNÊþü„?~ðûw¡×I×)Øax\ê¿62$Oèåt%×¥ÖQíâyú´_gèæy:»’Y[j(­‚ »FEõÎTV©µT®¹&<°"§^Ä/xQY¸‚¯Ù­psBå”+TíóÎí´Ö§Èø”ŽôTj¡0VŸx(„Oâ\ -IZ¹t4¤w!}^€gì½—ú€h9@±^è Ò…s2•=3B2H>ÉçynÞÌû7ŠC‰#ˆþhñÚï-whèÇGtBŒV²Õºø`uÞ1èFæ3ÑvðøçÓ÷à´ÛoÝ34\z"ž“,P&+š´®¿Ú»oB03¼å㣶xsÉÝä!FáŽ/†m'~¼êŒn?àßvaÝDëÑ÷ì+°¥¸5ïšôt0Ü ¶æ+ÉÔ·ÆsRz…t¥J–xFzëþƒöÁQîè`û¼/dŠº2›åž™j@7GÛú.º?Üwyc$O‘œÀI‘(S¥ME®ƒßŒô| Áõ^`‚2!-›’"VîIN™"Ì‚B-ÌÎe³/uV9:Î6:.Ç°ÿMɪH -endstream -endobj -810 0 obj +% 799 0 obj << -/Length 8252 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 414.241 393.262 426.3] +/A << /S /GoTo /D (subsection.7.7) >> >> -stream -0 g 0 G -0 g 0 G -BT -/F54 9.9626 Tf 99.895 706.129 Td [(signed)-263(to)-264(a)-263(pr)18(ocess)-263(that)-263(will)-264(own)-263(the)-263(corr)18(esponding)-263(r)18(ow)-263(in)-264(the)-263(coef)18(\002cient)-263(ma-)]TJ 0 -11.955 Td [(trix)-406(and)-406(will)-406(carry)-405(out)-406(all)-406(r)18(elated)-406(computations.)-778(This)-406(all)1(ocation)-406(strategy)-406(is)]TJ 0 -11.955 Td [(equivalent)-353(to)-353(a)-353(partition)-353(of)-353(the)-353(discr)18(etization)-353(mesh)-353(in)1(to)]TJ/F52 9.9626 Tf 253.543 0 Td [(sub-domains)]TJ/F54 9.9626 Tf 51.107 0 Td [(.)-619(Our)-353(li-)]TJ -304.65 -11.956 Td [(brary)-220(supports)-220(any)-220(distribution)-220(that)-220(keeps)-220(together)-220(the)-220(coef)18(\002cients)-220(of)-220(each)-220(ma-)]TJ 0 -11.955 Td [(trix)-244(r)18(ow;)-245(ther)18(e)-244(ar)18(e)-243(no)-244(other)-243(constraints)-244(on)-243(the)-243(variable)-244(assignment.)-308(This)-243(choice)]TJ 0 -11.955 Td [(is)-324(consistent)-324(with)-324(simple)-324(data)-325(distributions)-324(such)-324(as)]TJ/F59 9.9626 Tf 232.237 0 Td [(CYCLIC\050N\051)]TJ/F54 9.9626 Tf 50.302 0 Td [(and)]TJ/F59 9.9626 Tf 20.095 0 Td [(BLOCK)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)-324(as)]TJ -328.786 -11.955 Td [(well)-310(as)-309(completely)-310(arbitrary)-310(assignments)-309(of)-310(equation)-310(indices)-309(to)-310(pr)18(ocesses.)-489(In)]TJ 0 -11.955 Td [(particular)-250(it)-250(is)-251(consistent)-250(with)-250(the)-250(usage)-250(of)-251(graph)-250(partitioning)-250(tools)-250(commonly)]TJ 0 -11.956 Td [(available)-333(in)-332(the)-333(literatur)18(e,)-353(e.g.)-558(METIS)-332([)]TJ -1 0 0 rg 1 0 0 RG - [(14)]TJ -0 g 0 G - [(].)-558(Dense)-333(ve)1(ctors)-333(conform)-333(to)-332(sparse)]TJ 0 -11.955 Td [(matrices,)-257(that)-255(is,)-257(the)-255(entries)-255(of)-256(a)-255(vector)-255(follow)-256(the)-255(same)-255(distribution)-256(of)-255(the)-255(ma-)]TJ 0 -11.955 Td [(trix)-250(r)18(ows.)]TJ 14.944 -12.648 Td [(W)92(e)-343(assume)-344(that)-343(the)-344(sparse)-343(matrix)-343(is)-344(built)-343(in)-344(parallel,)-366(wher)18(e)-344(each)-343(pr)18(ocess)]TJ -14.944 -11.955 Td [(generates)-254(its)-254(own)-255(portion.)-322(W)92(e)-255(never)-254(r)18(equir)18(e)-254(that)-254(the)-255(entir)18(e)-254(matrix)-254(be)-254(available)]TJ 0 -11.955 Td [(on)-288(a)-288(single)-288(node.)-423(However)74(,)-298(it)-287(is)-288(possible)-288(to)-288(hold)-288(the)-288(entir)18(e)-288(matrix)-287(in)-288(one)-288(pr)18(o-)]TJ 0 -11.955 Td [(cess)-241(and)-242(distribute)-241(it)-241(explicitly)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 133.807 3.616 Td [(1)]TJ -0 g 0 G -/F54 9.9626 Tf 4.284 -3.616 Td [(,)-243(even)-241(though)-242(the)-241(r)18(esulting)-241(memory)-241(bottleneck)]TJ -138.091 -11.955 Td [(would)-250(make)-250(this)-250(option)-250(unattractive)-250(in)-250(most)-250(cases.)]TJ/F51 11.9552 Tf 0 -33.074 Td [(2.1)-1000(Basic)-250(Nomenclature)]TJ/F54 9.9626 Tf 0 -20.306 Td [(Our)-301(computational)-301(model)-301(implies)-301(that)-301(the)-301(data)-301(al)1(location)-301(on)-301(the)-301(parallel)-301(dis-)]TJ 0 -11.955 Td [(tributed)-370(memory)-369(machine)-370(is)-370(guided)-370(by)-369(the)-370(str)8(uctur)18(e)-370(of)-370(the)-369(physical)-370(model,)]TJ 0 -11.955 Td [(and)-250(speci\002cally)-250(by)-250(the)-250(discr)18(etization)-250(mesh)-250(of)-250(the)-250(PDE.)]TJ 14.944 -12.648 Td [(Each)-400(point)-400(of)-400(the)-399(discr)18(etization)-400(mesh)-400(will)-400(have)-400(\050at)-400(least)1(\051)-400(one)-400(associated)]TJ -14.944 -11.955 Td [(equation/variable,)-416(and)-384(ther)18(efor)18(e)-383(one)-383(index.)-710(W)92(e)-383(say)-383(that)-384(point)]TJ/F52 9.9626 Tf 289.765 0 Td [(i)-403(depends)]TJ/F54 9.9626 Tf 42.709 0 Td [(on)]TJ -332.474 -11.955 Td [(point)]TJ/F52 9.9626 Tf 26.955 0 Td [(j)]TJ/F54 9.9626 Tf 6.004 0 Td [(if)-312(the)-312(equation)-312(for)-312(a)-312(variable)-313(associated)-312(with)]TJ/F52 9.9626 Tf 202.502 0 Td [(i)]TJ/F54 9.9626 Tf 6.074 0 Td [(contains)-312(a)-312(term)-312(in)]TJ/F52 9.9626 Tf 84.153 0 Td [(j)]TJ/F54 9.9626 Tf 2.894 0 Td [(,)-328(or)]TJ -328.582 -11.955 Td [(equivalently)-291(if)]TJ/F52 9.9626 Tf 67.321 0 Td [(a)]TJ/F52 7.5716 Tf 4.59 -1.96 Td [(i)-67(j)]TJ/F83 10.3811 Tf 8.967 1.96 Td [(6)]TJ/F85 10.3811 Tf 0.249 0 Td [(=)]TJ/F54 9.9626 Tf 11.726 0 Td [(0.)-434(After)-292(the)-291(partition)-292(of)-291(the)-292(discr)18(etization)-291(mesh)-292(into)]TJ/F52 9.9626 Tf 233.514 0 Td [(sub-)]TJ -326.367 -11.955 Td [(domains)]TJ/F54 9.9626 Tf 37.559 0 Td [(assigned)-381(to)-381(the)-381(parallel)-381(pr)18(ocesses,)-413(we)-381(classify)-381(the)-381(points)-381(of)-381(a)-381(given)]TJ -37.559 -11.955 Td [(sub-domain)-250(as)-250(following.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.003 Td [(Internal.)]TJ -0 g 0 G -/F54 9.9626 Tf 43.995 0 Td [(An)-359(internal)-359(poi)1(nt)-359(of)-359(a)-359(given)-359(domain)]TJ/F52 9.9626 Tf 168.65 0 Td [(depends)]TJ/F54 9.9626 Tf 35.684 0 Td [(only)-359(on)-359(points)-358(of)-359(the)]TJ -223.422 -11.955 Td [(same)-264(domain.)-351(If)-264(all)-264(points)-264(of)-264(a)-264(domain)-263(ar)18(e)-264(assigned)-264(to)-264(one)-264(pr)18(ocess,)-267(then)]TJ 0 -11.956 Td [(a)-196(computational)-196(step)-195(\050e.g.,)-207(a)-196(matrix-vector)-196(pr)18(oduct\051)-196(of)-195(the)-196(equations)-196(asso-)]TJ 0 -11.955 Td [(ciated)-214(with)-213(the)-214(internal)-214(points)-214(r)18(equir)18(es)-213(no)-214(data)-214(items)-214(fr)18(om)-213(other)-214(domains)]TJ 0 -11.955 Td [(and)-250(no)-250(communications.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.695 Td [(Boundary)92(.)]TJ -0 g 0 G -/F54 9.9626 Tf 51.397 0 Td [(A)-192(point)-191(of)-192(a)-192(given)-191(domain)-192(is)-192(a)-191(boundary)-192(point)-192(if)-191(it)]TJ/F52 9.9626 Tf 217.552 0 Td [(depends)]TJ/F54 9.9626 Tf 34.019 0 Td [(on)-192(points)]TJ -278.061 -11.955 Td [(belonging)-250(to)-250(other)-250(domains.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.696 Td [(Halo.)]TJ -0 g 0 G -/F54 9.9626 Tf 29.609 0 Td [(A)-389(halo)-389(point)-389(for)-389(a)-389(given)-389(domain)-389(i)1(s)-389(a)-389(point)-389(belonging)-389(to)-389(another)-389(do-)]TJ -4.702 -11.955 Td [(main)-267(such)-267(that)-266(ther)18(e)-267(is)-267(a)-267(boundary)-267(point)-266(which)]TJ/F52 9.9626 Tf 212.474 0 Td [(depends)]TJ/F54 9.9626 Tf 34.767 0 Td [(on)-267(it.)-360(Whenever)]TJ -247.241 -11.955 Td [(performing)-360(a)-361(computational)-360(step,)-388(such)-361(as)-360(a)-361(matrix-vector)-360(pr)18(oduct,)-388(the)]TJ 0 -11.955 Td [(values)-274(associated)-273(with)-274(halo)-274(points)-274(ar)18(e)-274(r)18(equested)-273(fr)18(om)-274(other)-274(domains.)-381(A)]TJ 0 -11.955 Td [(boundary)-259(point)-258(of)-259(a)-258(given)-259(domain)-258(is)-259(usually)-258(a)-259(halo)-258(point)-259(for)-259(some)-258(other)]TJ 0 -11.956 Td [(domain)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 34.002 3.617 Td [(2)]TJ -0 g 0 G -/F54 9.9626 Tf 4.284 -3.617 Td [(;)-238(ther)18(efor)18(e)-232(the)-233(car)18(dinality)-232(of)-232(the)-232(boundary)-233(p)1(oints)-233(set)-232(denotes)-232(the)]TJ -38.286 -11.955 Td [(amount)-250(of)-250(data)-250(sent)-250(to)-250(other)-250(domains.)]TJ -0 g 0 G -ET -q -1 0 0 1 99.895 168.389 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S -Q -BT -/F54 5.9776 Tf 110.755 161.427 Td [(1)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(In)-250(our)-250(pr)18(ototype)-250(implementation)-250(we)-250(pr)18(ovide)-250(sample)-250(scatter/gather)-250(r)18(outines.)]TJ/F54 5.9776 Tf -3.487 -6.922 Td [(2)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(This)-401(is)-402(the)-401(normal)-402(situation)-401(when)-402(the)-401(pattern)-402(of)-401(the)-402(sparse)-401(matrix)-402(is)-401(symmetric,)-440(which)-401(is)]TJ -14.347 -9.464 Td [(equivalent)-358(to)-358(say)-358(that)-358(the)-357(interaction)-358(between)-358(two)-358(variables)-358(is)-358(r)18(ecipr)18(ocal.)-634(If)-357(the)-358(matrix)-358(pattern)]TJ 0 -9.465 Td [(is)-241(non-symmetric)-241(we)-242(may)-241(have)-241(one-way)-241(interactions,)-243(and)-241(these)-241(could)-241(cause)-242(a)-241(situation)-241(in)-241(which)-241(a)]TJ 0 -9.464 Td [(boundary)-250(point)-250(is)-250(not)-250(a)-250(halo)-250(point)-250(for)-250(its)-250(neighbour)74(.)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 169.365 -29.888 Td [(3)]TJ -0 g 0 G -ET - -endstream -endobj -823 0 obj +% 800 0 obj << -/Length 4830 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 402.147 288.227 414.206] +/A << /S /GoTo /D (subsection.7.8) >> >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(Overlap.)]TJ -0 g 0 G -/F54 9.9626 Tf 44.553 0 Td [(An)-245(overlap)-244(point)-245(is)-245(a)-245(boundary)-244(point)-245(assigned)-245(to)-244(multiple)-245(domains.)]TJ -19.647 -11.955 Td [(Any)-204(operation)-204(that)-204(involves)-204(an)-204(overlap)-204(point)-204(has)-204(to)-204(be)-204(r)18(eplicated)-204(for)-204(each)]TJ 0 -11.955 Td [(assignment.)]TJ -24.906 -18.943 Td [(Overlap)-358(points)-359(do)-358(not)-358(usually)-359(exist)-358(in)-359(the)-358(basic)-358(data)-359(distributions;)-412(however)]TJ 0 -11.955 Td [(they)-325(ar)18(e)-326(a)-325(featur)18(e)-326(of)-325(Domain)-326(Decomposition)-325(Schwarz)-326(pr)18(econditioners)-325(which)]TJ 0 -11.956 Td [(ar)18(e)-250(the)-250(subject)-250(of)-250(r)18(elated)-250(r)18(esear)18(ch)-250(work)-250([)]TJ -1 0 0 rg 1 0 0 RG - [(4)]TJ -0 g 0 G - [(,)]TJ -1 0 0 rg 1 0 0 RG - [-250(3)]TJ -0 g 0 G - [(].)]TJ 14.944 -11.955 Td [(W)92(e)-225(denote)-225(the)-225(sets)-225(of)-225(internal,)-230(boundary)-225(and)-225(halo)-225(points)-225(for)-225(a)-225(given)-225(subdo-)]TJ -14.944 -11.955 Td [(main)-251(by)]TJ/F83 10.3811 Tf 38.66 0 Td [(I)]TJ/F54 9.9626 Tf 6.53 0 Td [(,)]TJ/F83 10.3811 Tf 5.125 0 Td [(B)]TJ/F54 9.9626 Tf 9.753 0 Td [(and)]TJ/F83 10.3811 Tf 19.497 0 Td [(H)]TJ/F54 9.9626 Tf 8.972 0 Td [(.)-314(Each)-252(subdomain)-251(is)-252(assigned)-251(to)-252(one)-251(pr)18(ocess;)-253(each)-251(pr)18(ocess)]TJ -88.537 -11.955 Td [(usually)-346(owns)-346(one)-346(su)1(bdomain,)-370(although)-346(the)-346(user)-346(may)-346(choose)-345(to)-346(assign)-346(mor)18(e)]TJ 0 -11.955 Td [(than)-302(one)-301(subdomain)-302(to)-301(a)-302(pr)18(ocess.)-465(If)-302(each)-301(pr)18(ocess)]TJ/F52 9.9626 Tf 222.767 0 Td [(i)]TJ/F54 9.9626 Tf 5.968 0 Td [(owns)-302(one)-301(subdomain,)-315(the)]TJ -228.735 -11.956 Td [(number)-221(of)-221(r)18(ows)-221(in)-221(the)-221(local)-221(sparse)-221(matrix)-221(is)]TJ/F83 10.3811 Tf 192.655 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 4.799 0 Td [(+)]TJ/F83 10.3811 Tf 9.989 0 Td [(j)-24(B)]TJ/F52 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.003 0 Td [(,)-227(and)-221(the)-221(number)-221(of)-221(local)]TJ -235.248 -11.955 Td [(columns)-207(\050i.e.)-296(those)-207(for)-207(which)-207(ther)18(e)-208(exists)-207(at)-207(least)-207(one)-207(non-zer)18(o)-208(entry)-207(in)-207(the)-207(local)]TJ 0 -11.955 Td [(r)18(ows\051)-250(is)]TJ/F83 10.3811 Tf 37.275 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.066 0 Td [(+)]TJ/F83 10.3811 Tf 10.255 0 Td [(j)-24(B)]TJ/F52 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.066 0 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(j)-24(H)]TJ/F52 7.5716 Tf 12.051 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.004 0 Td [(.)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -1 0 0 1 222.462 541.675 cm -q -0 -1 1 0 0 0 cm -q -.65 0 0 .65 0 0 cm -q -1 0 0 1 0 0 cm -/Im3 Do -Q -Q -Q -0 g 0 G -1 0 0 1 -222.462 -541.675 cm -BT -/F54 9.9626 Tf 260.803 335.398 Td [(Figur)18(e)-250(2:)-310(Point)-250(class\002cation.)]TJ -0 g 0 G -0 g 0 G - -95.154 -23.688 Td [(This)-190(classi\002cation)-190(of)-190(mesh)-190(points)-190(guides)-190(the)-190(naming)-190(scheme)-190(that)-190(we)-190(adopted)]TJ -14.944 -11.956 Td [(in)-190(the)-190(library)-190(internals)-190(and)-190(in)-190(the)-190(data)-190(str)8(uctur)18(es.)-290(W)92(e)-190(explicitly)-190(note)-190(that)-190(\223Halo\224)]TJ 0 -11.955 Td [(points)-250(ar)18(e)-250(also)-250(often)-250(called)-250(\223ghost\224)-250(points)-250(in)-250(the)-250(literatur)18(e.)]TJ/F51 11.9552 Tf 0 -28.902 Td [(2.2)-1000(Library)-250(contents)]TJ/F54 9.9626 Tf 0 -18.964 Td [(The)-250(PSBLAS)-250(library)-250(consists)-250(of)-250(various)-250(classes)-250(of)-250(subr)18(outines:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -18.943 Td [(Computational)-250(routines)]TJ -0 g 0 G -/F54 9.9626 Tf 113.723 0 Td [(comprising:)]TJ -0 g 0 G - -77.917 -19.434 Td [(\225)]TJ -0 g 0 G - [-500(Sparse)-250(matrix)-250(by)-250(dense)-250(matrix)-250(pr)18(oduct;)]TJ -0 g 0 G - 0 -15.449 Td [(\225)]TJ -0 g 0 G - [-500(Sparse)-250(triangular)-250(systems)-250(solution)-250(for)-250(block)-250(diagonal)-250(matrices;)]TJ -0 g 0 G - 0 -15.449 Td [(\225)]TJ -0 g 0 G - [-500(V)111(ector)-250(and)-250(matrix)-250(norms;)]TJ -0 g 0 G - 0 -15.449 Td [(\225)]TJ -0 g 0 G - [-500(Dense)-250(matrix)-250(sums;)]TJ -0 g 0 G - 0 -15.449 Td [(\225)]TJ -0 g 0 G - [-500(Dot)-250(pr)18(oducts.)]TJ -0 g 0 G -/F51 9.9626 Tf -35.806 -19.434 Td [(Communication)-250(routines)]TJ -0 g 0 G -/F54 9.9626 Tf 118.704 0 Td [(handling)-250(halo)-250(and)-250(overlap)-250(communications;)]TJ -0 g 0 G - 50.661 -29.888 Td [(4)]TJ -0 g 0 G -ET - -endstream -endobj -820 0 obj +% 801 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 390.052 260.403 402.112] +/A << /S /GoTo /D (subsection.7.9) >> +>> +% 802 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 377.958 242.878 390.018] +/A << /S /GoTo /D (subsection.7.10) >> +>> +% 803 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 365.864 269.159 377.924] +/A << /S /GoTo /D (subsection.7.11) >> +>> +% 804 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 353.77 266.31 365.829] +/A << /S /GoTo /D (subsection.7.12) >> +>> +% 805 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 341.676 335.51 353.735] +/A << /S /GoTo /D (subsection.7.13) >> +>> +% 806 0 obj << -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/points.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 826 0 R -/BBox [0 0 274 308] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 827 0 R ->>/Font << /R8 828 0 R>> +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 329.581 334.742 341.641] +/A << /S /GoTo /D (subsection.7.14) >> >> -/Length 1397 -/Filter /FlateDecode +% 807 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 317.487 305.523 329.547] +/A << /S /GoTo /D (subsection.7.15) >> >> -stream -xœÝYËŽ5Ý÷Wô’ q±Ën»ï6 -– 󣄠¹3ÊBü=®§«æ±bA‡HŒ}Ï©c·í²»?­é”ׄÿäïÍeùö]_?ü¾¤Ó©d êwßGüðaù´d"®òçæ²¾¾ä}ÍíëÕûe4­ß ,äýÔ×sÿ»º,_ýx÷Ç/w×·¯®~[¾»ZÞ.ø›Œ1¸ð™âuóâ¯ïÿ¼ûùúáoO*žþx/þÃõí½Î22Tø<ᜇd†&Âoî/×ïV˜âÿõèCê1V^õd¨æõãR ¬Û9ŸÎç¶^–ºµÓ¾ÍšÚýÝz¦zõ¯7‹!€S®ûj짔êJÚR¿–ðWZSöN•m˜´ ide«3çûfyÿõROÛú×|J_F¿~]~z2ò–}×òVÐÕämë¦Î€sQ<I<³¦uiüd¸r͵9.Ö¤¢ÆR’ÉÑãY~ОÐCÑÝ¥Ÿ}öçÙ^â<3LA ‰c‹YÒ¶®ôçY¯qž&mCÙØâÌû懣ç—Ñ#|H–_rƧšÇÒ³,wš0s>}yüÇ5ÒNóË p%U¤ –ðW@E’§$§•|¡pxõE`&ÆøåU ™¤ó«›%AÝIUÍ0Gš]ý‘&ûÖM’ î Jšx÷¬…T.ù)~¼C²8˜}~‚­ÛÍWÛ¢íÁvKÑö¶K,8ÛÍ—&†`[C*—ü¨ONÔÇs­ƒ ½m‚ê ò9؆Áu¶!×`{P9¦m‚êKI7oÛB*—ü¨O샹~ñ̳·Ç'­¡Á^ÝIaÏvRy!œzw'ó¤`Íx"0.Ѥb'…iÄù|ùÌs¼žP:-%X/[´^º“#Àa°há…dÞPÓY/)Z‡Ýqˆ&-VŠÖ½ON¬Çtnƒ®G±À¹ÍY–& é›Ë’וB¿Ìœ¤¡¹M…ÁnngäŽ%¤Ò#ØœÃÉÙÇ‚"d;’Àô)ùÃ(˜\X‹³Ž¥²£0}Z¡pø#`Ó†Sò‹%Hvt§Ð̧f£`ú`-Î+”ÐŽQ4ó9ƒ…Ç,x›O/,îf,z»âißn«ªÝìv«$½úæ-ÜŒå`?›“禩™|,ˆ7cïó™;Ìñº@!osõé]Š¦?ݲta0€yýÒ¥¤Zdy›«OïRÜ<%9­äƒ€[}拇ú6m8uõIPžþhǃf>m))…YÞæê“ Ò<%9­äƒ€[}ækçÿÜæ“WO’rõ= A} £ Ñ0'Ë 9‘S,irêÕ÷+\_ã­uâÝ¿›ÑÆE?æóé{¦ƒÙÇá'È‹ÎB#4_²$&†`[–’qq‘‘&/> Mõ5^_'†`[Bý˜OõºÖÁ–%©¡ ª/]07o[šqq ’&/M Íõ5^_'nÞ¶†4.ú1Ÿ6ØsýÜ¥%]Š!ƒCÞgVe@Ù–‹’…$)š5-ƒÃØ5}‡ä²?ÖLg+‡ |>{é>hO‘jøX5~,ê>–0àxÕ},1’š¬ác ”ø±ŠûX€5‹ûXb$3òø³ Ú…t¡í¡=Å>tpº8Õ‡’Ô$iÎ>´-ö¡Ç%ÀšTÔXJR#ÞgL¼í“-J/0®jãȶw.Þâªick£Z,”Ô¤š^”Ñk·ì«éUÝ ‹¯WjÇ‚µÛçƒ.ÁºUE³zÉgýãPˆ,é"›Ñe±ûÌ‹:t˜!*%~ Ö *«QÊÒ@emPMÓ1:¾Þ’àX¼÷(˜®4æ ¤Nƒ¾]þÎJ¦' -endstream -endobj -748 0 obj +% 808 0 obj << -/Type /ObjStm -/N 100 -/First 919 -/Length 15283 ->> -stream -699 0 700 151 701 304 702 457 703 610 704 762 705 914 706 1067 707 1220 708 1373 -709 1526 710 1678 711 1831 712 1977 713 2129 743 2281 714 2432 715 2584 716 2736 717 2888 -718 3040 719 3190 720 3342 721 3494 722 3647 723 3800 724 3951 725 4103 726 4256 727 4409 -728 4562 729 4715 730 4861 731 5013 732 5165 733 5317 744 5469 734 5620 735 5766 745 5918 -736 6069 746 6221 737 6372 747 6524 742 6673 739 6729 761 6809 738 7055 764 7207 749 7359 -765 7511 750 7663 766 7815 751 7967 752 8112 753 8265 754 8418 755 8571 756 8724 757 8877 -758 9030 759 9177 763 9330 760 9387 778 9467 767 9673 768 9823 769 9974 770 10125 771 10279 -772 10429 773 10579 774 10729 775 10877 776 11025 7 11173 777 11227 797 11320 800 11470 801 11711 -802 11753 803 12139 791 12439 792 12585 793 12732 11 12879 799 12935 796 12992 809 13113 795 13263 -806 13411 807 13560 811 13709 15 13765 815 13820 816 13877 808 13934 822 14066 826 14208 827 14322 -% 699 0 obj +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [113.843 305.393 232.786 317.453] +/A << /S /GoTo /D (subsection.7.16) >> +>> +% 809 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 666.041 386.806 678.1] -/A << /S /GoTo /D (subsection.6.16) >> +/Rect [113.843 293.299 242.519 305.359] +/A << /S /GoTo /D (subsection.7.17) >> >> -% 700 0 obj +% 810 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 653.946 368.116 666.006] -/A << /S /GoTo /D (subsection.6.17) >> +/Rect [98.899 271.309 183.083 283.109] +/A << /S /GoTo /D (section.8) >> >> -% 701 0 obj +% 811 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 641.852 370.219 653.912] -/A << /S /GoTo /D (subsection.6.18) >> +/Rect [113.843 259.055 387.474 271.115] +/A << /S /GoTo /D (subsection.8.1) >> >> -% 702 0 obj +% 812 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 629.758 214.116 641.818] -/A << /S /GoTo /D (subsection.6.19) >> +/Rect [113.843 246.961 415.897 259.021] +/A << /S /GoTo /D (subsection.8.2) >> >> -% 703 0 obj +% 813 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 617.664 231.69 629.724] -/A << /S /GoTo /D (subsection.6.20) >> +/Rect [113.843 234.867 399.738 246.926] +/A << /S /GoTo /D (subsection.8.3) >> >> -% 704 0 obj +% 814 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 605.57 204.353 617.629] -/A << /S /GoTo /D (subsection.6.21) >> +/Rect [113.843 222.773 444.603 234.832] +/A << /S /GoTo /D (subsection.8.4) >> >> -% 705 0 obj +% 825 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 593.476 221.927 605.535] -/A << /S /GoTo /D (subsection.6.22) >> +/Rect [98.899 213.468 180.781 222.877] +/A << /S /GoTo /D (subsection.8.4) >> >> -% 706 0 obj +% 815 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 581.381 379.076 593.441] -/A << /S /GoTo /D (subsection.6.23) >> +/Rect [98.899 191.298 152.896 200.628] +/A << /S /GoTo /D (section.9) >> >> -% 707 0 obj +% 816 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 569.287 359.768 581.347] -/A << /S /GoTo /D (subsection.6.24) >> +/Rect [113.843 176.574 444.603 188.633] +/A << /S /GoTo /D (subsection.9.1) >> >> -% 708 0 obj +% 826 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 557.193 373.158 569.253] -/A << /S /GoTo /D (subsection.6.25) >> +/Rect [98.899 164.619 201.494 176.678] +/A << /S /GoTo /D (subsection.9.1) >> >> -% 709 0 obj +% 817 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 545.099 287.68 557.158] -/A << /S /GoTo /D (subsection.6.26) >> +/Rect [113.843 152.524 444.603 164.584] +/A << /S /GoTo /D (subsection.9.2) >> >> -% 710 0 obj +% 827 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 533.005 221.369 545.064] -/A << /S /GoTo /D (subsection.6.27) >> +/Rect [98.899 143.219 168.468 152.629] +/A << /S /GoTo /D (subsection.9.2) >> >> -% 711 0 obj +% 818 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 513.485 251.974 522.815] -/A << /S /GoTo /D (section.7) >> +/Rect [113.843 128.475 444.603 140.535] +/A << /S /GoTo /D (subsection.9.3) >> >> -% 712 0 obj +% 828 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 498.761 364.739 510.821] -/A << /S /GoTo /D (subsection.7.1) >> +/Rect [98.899 119.17 202.859 128.58] +/A << /S /GoTo /D (subsection.9.3) >> >> -% 713 0 obj +% 823 0 obj +<< +/D [821 0 R /XYZ 98.895 753.953 null] +>> +% 820 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 861 0 obj +<< +/Type /Page +/Contents 862 0 R +/Resources 860 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 668 0 R +/Annots [ 819 0 R 864 0 R 829 0 R 865 0 R 830 0 R 866 0 R 831 0 R 832 0 R 833 0 R 834 0 R 835 0 R 836 0 R 837 0 R 838 0 R 839 0 R 840 0 R 841 0 R 842 0 R 843 0 R 844 0 R 845 0 R 846 0 R 847 0 R 848 0 R 849 0 R 850 0 R 851 0 R 852 0 R 853 0 R 854 0 R 855 0 R 856 0 R 857 0 R 858 0 R 859 0 R ] +>> +% 819 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 486.667 444.603 498.726] -/A << /S /GoTo /D (subsection.7.2) >> +/Rect [164.653 702.323 495.412 714.383] +/A << /S /GoTo /D (subsection.9.4) >> >> -% 743 0 obj +% 864 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 477.362 161.365 486.467] -/A << /S /GoTo /D (subsection.7.2) >> +/Rect [149.709 693.018 253.668 702.428] +/A << /S /GoTo /D (subsection.9.4) >> >> -% 714 0 obj +% 829 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 462.617 364.172 474.677] -/A << /S /GoTo /D (subsection.7.3) >> +/Rect [164.653 678.413 495.412 690.472] +/A << /S /GoTo /D (subsection.9.5) >> >> -% 715 0 obj +% 865 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 450.523 358.772 462.583] -/A << /S /GoTo /D (subsection.7.4) >> +/Rect [149.709 669.108 253.668 678.517] +/A << /S /GoTo /D (subsection.9.5) >> >> -% 716 0 obj +% 830 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 438.429 307.635 450.489] -/A << /S /GoTo /D (subsection.7.5) >> +/Rect [164.653 654.503 495.412 666.562] +/A << /S /GoTo /D (subsection.9.6) >> >> -% 717 0 obj +% 866 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 426.335 277.737 438.395] -/A << /S /GoTo /D (subsection.7.6) >> +/Rect [149.709 645.197 253.668 654.607] +/A << /S /GoTo /D (subsection.9.6) >> >> -% 718 0 obj +% 831 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 414.241 393.262 426.3] -/A << /S /GoTo /D (subsection.7.7) >> +/Rect [149.709 623.26 274.28 632.59] +/A << /S /GoTo /D (section.10) >> >> -% 719 0 obj +% 832 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 402.147 288.227 414.206] -/A << /S /GoTo /D (subsection.7.8) >> +/Rect [164.653 608.674 333.298 620.734] +/A << /S /GoTo /D (subsection.10.1) >> >> -% 720 0 obj +% 833 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 390.052 260.403 402.112] -/A << /S /GoTo /D (subsection.7.9) >> +/Rect [164.653 596.719 331.326 608.779] +/A << /S /GoTo /D (subsection.10.2) >> >> -% 721 0 obj +% 834 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 377.958 242.878 390.018] -/A << /S /GoTo /D (subsection.7.10) >> +/Rect [164.653 584.764 381.626 596.824] +/A << /S /GoTo /D (subsection.10.3) >> >> -% 722 0 obj +% 835 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 365.864 269.159 377.924] -/A << /S /GoTo /D (subsection.7.11) >> +/Rect [164.653 572.809 427.165 584.869] +/A << /S /GoTo /D (subsection.10.4) >> >> -% 723 0 obj +% 836 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 353.77 266.31 365.829] -/A << /S /GoTo /D (subsection.7.12) >> +/Rect [164.653 560.854 353.343 572.913] +/A << /S /GoTo /D (subsection.10.5) >> >> -% 724 0 obj +% 837 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [164.653 548.899 315.177 560.958] +/A << /S /GoTo /D (subsection.10.6) >> +>> + +endstream +endobj +880 0 obj +<< +/Length 8045 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 14.3462 Tf 99.895 705.784 Td [(1)-1000(Introduction)]TJ/F62 9.9626 Tf 0 -22.913 Td [(The)-272(PSBLAS)-271(library)111(,)-277(developed)-272(with)-272(t)1(he)-272(aim)-272(to)-271(facilitate)-272(the)-272(parallelization)-271(of)]TJ 0 -11.955 Td [(computationally)-348(intensive)-347(scienti\002c)-348(applications,)-372(is)-347(designed)-348(to)-348(addr)18(ess)-347(par)18(-)]TJ 0 -11.955 Td [(allel)-282(implementation)-283(of)-282(iterative)-282(solvers)-283(for)-282(sparse)-282(linear)-283(systems)-282(thr)18(ough)-282(the)]TJ 0 -11.955 Td [(distributed)-232(memory)-232(paradigm.)-304(It)-232(includes)-233(r)18(outines)-232(for)-232(multiplying)-232(sparse)-232(ma-)]TJ 0 -11.955 Td [(trices)-211(by)-211(dense)-211(matrices,)-219(solving)-211(block)-211(diagonal)-211(systems)-211(with)-211(triangular)-211(diago-)]TJ 0 -11.956 Td [(nal)-229(entries,)-233(pr)18(epr)18(ocessing)-228(sparse)-229(matrices,)-233(and)-228(contains)-229(additional)-229(r)18(outines)-228(for)]TJ 0 -11.955 Td [(dense)-292(matrix)-292(operations.)-436(The)-292(curr)18(ent)-292(implementation)-292(of)-292(PSBLAS)-292(addr)18(esses)-292(a)]TJ 0 -11.955 Td [(distributed)-250(memory)-250(execution)-250(model)-250(operating)-250(with)-250(message)-250(passing.)]TJ 14.944 -12.064 Td [(The)-267(PSBLAS)-267(library)-268(version)-267(3)-267(is)-267(implemented)-267(in)-267(the)-268(Fortran)-267(2003)-267([)]TJ +1 0 0 rg 1 0 0 RG + [(16)]TJ +0 g 0 G + [(])-267(pr)18(o-)]TJ -14.944 -11.955 Td [(gramming)-278(language,)-284(with)-277(r)18(euse)-278(and/or)-278(adaptation)-277(of)-278(existing)-277(Fortran)-278(77)-277(and)]TJ 0 -11.955 Td [(Fortran)-250(95)-250(softwar)18(e,)-250(plus)-250(a)-250(handful)-250(of)-250(C)-250(r)18(outines.)]TJ 14.944 -12.064 Td [(The)-391(use)-392(of)-391(Fortran)-392(2003)-391(of)18(fers)-392(a)-391(number)-391(of)-392(advantages)-391(over)-392(Fortran)-391(95,)]TJ -14.944 -11.955 Td [(mostly)-385(in)-385(the)-385(handling)-385(of)-385(r)18(equir)18(ements)-385(for)-385(evolution)-385(and)-385(adaptation)-385(of)-385(the)]TJ 0 -11.956 Td [(library)-431(to)-432(new)-431(computing)-432(ar)18(chitectur)18(es)-431(and)-432(integration)-431(of)-432(new)-431(algorithms.)]TJ 0 -11.955 Td [(For)-365(a)-365(detail)1(ed)-365(discussion)-365(of)-365(our)-364(design)-365(see)-365([)]TJ +1 0 0 rg 1 0 0 RG + [(10)]TJ +0 g 0 G + [(];)-422(other)-365(works)-364(discussing)-365(ad-)]TJ 0 -11.955 Td [(vanced)-361(pr)18(ogramming)-362(in)-361(Fortran)-362(2003)-361(include)-362([)]TJ +1 0 0 rg 1 0 0 RG + [(20)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-361(18)]TJ +0 g 0 G + [(];)-417(suf)18(\002cient)-362(support)-361(for)]TJ 0 -11.955 Td [(Fortran)-314(2003)-314(is)-313(now)-314(available)-314(fr)18(om)-314(many)-313(compilers,)-330(including)-314(the)-313(GNU)-314(For)18(-)]TJ 0 -11.955 Td [(tran)-250(compiler)-250(fr)18(om)-250(the)-250(Fr)18(ee)-250(Softwar)18(e)-250(Foundation)-250(\050as)-250(of)-250(version)-250(4.8\051.)]TJ 14.944 -12.064 Td [(Pr)18(evious)-311(appr)18(oaches)-312(have)-311(been)-311(based)-311(on)-312(mixing)-311(Fortran)-311(95,)-327(with)-311(its)-311(sup-)]TJ -14.944 -11.955 Td [(port)-249(for)-249(object-based)-249(design,)-249(with)-249(other)-249(languages;)-249(these)-249(have)-249(been)-249(advocated)]TJ 0 -11.956 Td [(by)-346(a)-346(number)-346(of)-347(authors,)-370(e.g.)-346([)]TJ +1 0 0 rg 1 0 0 RG + [(15)]TJ +0 g 0 G + [(].)-598(Mor)18(eover)74(,)-371(the)-346(Fortran)-346(95)-346(facilities)-346(for)-346(dy-)]TJ 0 -11.955 Td [(namic)-411(memory)-410(management)-411(and)-410(interface)-411(overloading)-410(gr)18(eatly)-411(enhance)-410(the)]TJ 0 -11.955 Td [(usability)-397(of)-398(the)-397(PSBLAS)-398(subr)18(outines.)-752(In)-398(this)-397(way)111(,)-434(the)-398(library)-397(can)-398(take)-397(car)18(e)]TJ 0 -11.955 Td [(of)-267(r)8(untime)-266(memory)-267(r)18(equir)18(ements)-266(that)-267(ar)18(e)-266(quite)-267(dif)18(\002cult)-267(or)-266(even)-267(impossible)-266(to)]TJ 0 -11.955 Td [(pr)18(edict)-250(at)-250(implementation)-250(or)-250(compilation)-250(time.)]TJ 14.944 -12.064 Td [(The)-249(pr)18(esentation)-250(of)-249(the)-250(PSBLAS)-249(library)-249(follows)-250(the)-249(general)-249(str)8(uctur)18(e)-250(of)-249(the)]TJ -14.944 -11.955 Td [(pr)18(oposal)-207(for)-206(serial)-207(Sparse)-207(BLAS)-207([)]TJ +1 0 0 rg 1 0 0 RG + [(7)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-206(8)]TJ +0 g 0 G + [(],)-216(which)-206(in)-207(its)-207(turn)-206(is)-207(based)-207(on)-207(t)1(he)-207(pr)18(oposal)]TJ 0 -11.956 Td [(for)-250(BLAS)-250(on)-250(dense)-250(matrices)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(14)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-250(4)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-250(5)]TJ +0 g 0 G + [(].)]TJ 14.944 -12.063 Td [(The)-297(applicability)-297(of)-298(sparse)-297(iterative)-297(solvers)-297(to)-297(many)-298(dif)18(fer)18(ent)-297(ar)18(eas)-297(causes)]TJ -14.944 -11.956 Td [(some)-190(terminology)-190(pr)18(oblems)-190(because)-190(the)-190(same)-190(concept)-190(may)-190(be)-190(denoted)-190(thr)18(ough)]TJ 0 -11.955 Td [(dif)18(fer)18(ent)-271(names)-271(depending)-272(on)-271(the)-271(application)-271(ar)18(ea.)-374(The)-271(PSBLAS)-271(featur)18(es)-271(pr)18(e-)]TJ 0 -11.955 Td [(sented)-332(in)-333(this)-332(document)-332(will)-333(be)-332(discussed)-332(r)18(eferring)-333(to)-332(a)-333(\002ni)1(te)-333(dif)18(fer)18(ence)-332(dis-)]TJ 0 -11.955 Td [(cr)18(etization)-284(of)-285(a)-284(Partial)-285(Dif)18(fer)18(ential)-284(Equation)-284(\050PDE\051.)-285(However)74(,)-293(the)-284(scope)-285(of)-284(the)]TJ 0 -11.955 Td [(library)-283(is)-283(wider)-283(than)-284(that:)-376(for)-283(example,)-291(it)-283(can)-284(be)-283(applied)-283(to)-283(\002nite)-283(element)-283(dis-)]TJ 0 -11.956 Td [(cr)18(etizations)-267(of)-267(PDEs,)-271(and)-267(even)-266(to)-267(dif)18(fer)18(ent)-267(classes)-267(of)-267(pr)18(oblems)-267(such)-266(as)-267(nonlin-)]TJ 0 -11.955 Td [(ear)-250(optimization,)-250(for)-250(example)-250(in)-250(optimal)-250(contr)18(ol)-250(pr)18(oblems.)]TJ 14.944 -12.064 Td [(The)-383(design)-383(of)-383(a)-383(solver)-383(for)-384(sparse)-383(linear)-383(systems)-383(is)-383(driven)-383(by)-383(many)-383(con-)]TJ -14.944 -11.955 Td [(\003icting)-271(objectives,)-277(such)-272(as)-271(limiting)-271(occupation)-272(of)-271(storage)-271(r)18(esour)18(ces,)-277(exploiting)]TJ 0 -11.955 Td [(r)18(egularities)-274(in)-274(the)-275(input)-274(data,)-280(exploiting)-274(har)18(dwar)18(e)-275(characteristi)1(cs)-275(of)-274(the)-274(paral-)]TJ 0 -11.955 Td [(lel)-350(platform.)-610(T)92(o)-350(achieve)-350(an)-350(optimal)-350(communication)-350(to)-350(computation)-350(ratio)-350(on)]TJ 0 -11.955 Td [(distributed)-379(memory)-378(machines)-379(it)-378(is)-379(essential)-379(to)-378(keep)-379(the)]TJ/F60 9.9626 Tf 256.501 0 Td [(data)-379(locality)]TJ/F62 9.9626 Tf 54.198 0 Td [(as)-379(high)]TJ -310.699 -11.956 Td [(as)-315(possible;)-346(this)-315(can)-314(be)-315(done)-315(thr)18(ough)-314(an)-315(appr)18(opriate)-314(data)-315(allocation)-314(strategy)111(.)]TJ 0 -11.955 Td [(The)-323(choice)-323(of)-324(the)-323(pr)18(econditioner)-323(is)-323(another)-323(very)-324(important)-323(factor)-323(that)-323(af)18(fects)]TJ 0 -11.955 Td [(ef)18(\002ciency)-300(of)-300(the)-300(im)1(plemented)-300(application.)-460(Optimal)-300(data)-299(distribution)-300(r)18(equir)18(e-)]TJ 0 -11.955 Td [(ments)-300(for)-299(a)-300(given)-300(pr)18(econditioner)-299(may)-300(con\003ict)-300(with)-300(distribution)-299(r)18(equir)18(ements)]TJ 0 -11.955 Td [(of)-356(the)-356(r)18(est)-356(of)-357(the)-356(solver)74(.)-628(Finding)-356(the)-357(o)1(ptimal)-357(trade-of)18(f)-356(may)-356(be)-356(very)-356(dif)18(\002cult)]TJ 0 -11.955 Td [(because)-292(it)-291(is)-292(application)-291(dependent.)-435(Possible)-292(solutions)-291(to)-292(these)-292(pr)18(oblems)-291(and)]TJ 0 -11.956 Td [(other)-342(important)-342(inputs)-342(to)-342(the)-342(development)-342(of)-341(the)-342(PSBLAS)-342(softwar)18(e)-342(package)]TJ +0 g 0 G + 169.365 -29.887 Td [(1)]TJ +0 g 0 G +ET + +endstream +endobj +899 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 341.676 335.51 353.735] -/A << /S /GoTo /D (subsection.7.13) >> +/Length 5269 >> -% 725 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 150.705 706.129 Td [(have)-292(come)-291(fr)18(om)-292(an)-292(established)-291(experience)-292(in)-292(applying)-291(the)-292(PSBLAS)-291(solvers)-292(to)]TJ 0 -11.955 Td [(computational)-250(\003uid)-250(dynamics)-250(applications.)]TJ/F59 14.3462 Tf 0 -33.474 Td [(2)-1000(General)-250(overview)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-190(PSBLAS)-190(library)-190(is)-190(designed)-190(to)-190(handle)-190(the)-190(implementation)-190(of)-190(iterative)-190(solvers)]TJ 0 -11.955 Td [(for)-275(sparse)-275(linear)-275(systems)-275(on)-275(distributed)-275(memory)-274(parallel)-275(computers.)-385(The)-275(sys-)]TJ 0 -11.955 Td [(tem)-307(coef)18(\002cient)-308(matrix)]TJ/F60 9.9626 Tf 100.571 0 Td [(A)]TJ/F62 9.9626 Tf 10.381 0 Td [(must)-307(be)-308(squar)18(e;)-336(it)-308(may)-307(be)-308(r)18(eal)-307(or)-307(complex,)-322(nonsym-)]TJ -110.952 -11.955 Td [(metric,)-301(and)-291(its)-291(sparsity)-291(pattern)-291(needs)-291(not)-291(to)-291(be)-291(symmetric.)-433(The)-291(serial)-291(compu-)]TJ 0 -11.955 Td [(tation)-240(parts)-239(ar)18(e)-240(based)-240(on)-239(the)-240(serial)-240(sparse)-239(BLAS,)-240(so)-240(that)-239(any)-240(extension)-239(made)-240(to)]TJ 0 -11.956 Td [(the)-258(data)-258(str)8(uctur)18(es)-259(of)-258(the)-258(serial)-258(kernels)-258(is)-259(available)-258(to)-258(the)-258(parallel)-258(version.)-335(The)]TJ 0 -11.955 Td [(overall)-294(design)-294(and)-294(parallelization)-294(strategy)-294(have)-294(been)-294(in\003uenced)-294(by)-294(the)-294(str)8(uc-)]TJ 0 -11.955 Td [(tur)18(e)-306(of)-307(the)-306(ScaLAP)92(ACK)-306(parallel)-307(library)111(.)-479(The)-306(layer)18(ed)-306(str)8(uctur)18(e)-306(of)-307(the)-306(PSBLAS)]TJ 0 -11.955 Td [(library)-349(is)-349(shown)-348(in)-349(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-349(1)]TJ +0 g 0 G + [(;)-398(lower)-349(layers)-349(of)-349(the)-349(library)-349(in)1(dicate)-349(an)-349(encapsu-)]TJ 0 -11.955 Td [(lation)-314(r)18(elationship)-314(with)-313(upper)-314(layers.)-502(The)-314(ongoing)-314(discussion)-313(focuses)-314(on)-314(the)]TJ 0 -11.955 Td [(Fortran)-244(2003)-244(layer)-245(immediately)-244(below)-244(the)-244(application)-244(layer)74(.)-308(The)-245(serial)-244(parts)-244(of)]TJ 0 -11.956 Td [(the)-230(computation)-230(on)-230(each)-230(pr)18(ocess)-230(ar)18(e)-230(executed)-230(thr)18(ough)-230(calls)-230(to)-230(the)-230(serial)-230(sparse)]TJ 0 -11.955 Td [(BLAS)-307(subr)18(outines.)-482(In)-307(a)-307(similar)-308(way)111(,)-321(the)-307(inter)18(-pr)18(ocess)-308(message)-307(exchanges)-307(ar)18(e)]TJ 0 -11.955 Td [(encapsulated)-244(in)-243(an)-244(applicaiton)-244(layer)-243(that)-244(has)-244(been)-243(str)18(ongly)-244(inspir)18(ed)-244(by)-243(the)-244(Ba-)]TJ 0 -11.955 Td [(sic)-314(Linear)-313(Algebra)-314(Communication)-313(Subr)18(outines)-314(\050BLACS\051)-314(library)-313([)]TJ +1 0 0 rg 1 0 0 RG + [(6)]TJ +0 g 0 G + [(].)-501(Usually)]TJ 0 -11.955 Td [(ther)18(e)-315(is)-314(no)-315(need)-315(to)-314(deal)-315(dir)18(ectly)-314(with)-315(MPI;)-315(however)74(,)-330(in)-315(some)-315(cases,)-331(MPI)-314(r)18(ou-)]TJ 0 -11.955 Td [(tines)-219(ar)18(e)-219(used)-218(dir)18(ectly)-219(to)-219(impr)18(ove)-219(ef)18(\002ciency)111(.)-299(For)-219(further)-219(details)-219(on)-218(our)-219(commu-)]TJ 0 -11.956 Td [(nication)-250(layer)-250(see)-250(Sec.)]TJ +0 0 1 rg 0 0 1 RG + [-250(7)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +1 0 0 1 258.536 281.98 cm +q +.65 0 0 .65 0 0 cm +q +1 0 0 1 0 0 cm +/Im2 Do +Q +Q +0 g 0 G +1 0 0 1 -258.536 -281.98 cm +BT +/F62 9.9626 Tf 216.385 250.1 Td [(Figur)18(e)-250(1:)-310(PSBLAS)-250(library)-250(components)-250(hierar)18(chy)111(.)]TJ +0 g 0 G +0 g 0 G + -50.736 -22.178 Td [(The)-370(type)-369(of)-370(linear)-369(system)-370(matrices)-370(that)-369(we)-370(addr)18(ess)-369(typically)-370(arise)-370(in)-369(the)]TJ -14.944 -11.955 Td [(numerical)-260(solution)-261(of)-260(PDEs;)-266(in)-260(such)-260(a)-261(context,)-263(it)-260(is)-261(necessary)-260(to)-260(pay)-261(special)-260(at-)]TJ 0 -11.955 Td [(tention)-297(to)-298(the)-297(str)8(uctur)18(e)-298(of)-297(the)-298(pr)18(oblem)-297(fr)18(om)-298(which)-297(the)-298(application)-297(originates.)]TJ 0 -11.955 Td [(The)-277(nonzer)18(o)-276(pattern)-277(of)-277(a)-276(matrix)-277(arising)-277(fr)18(om)-276(the)-277(discr)18(etization)-276(of)-277(a)-277(PDE)-276(is)-277(in-)]TJ 0 -11.956 Td [(\003uenced)-232(by)-232(various)-231(factors,)-236(such)-232(as)-232(the)-231(shape)-232(of)-232(the)-232(domain,)-235(the)-232(discr)18(etization)]TJ 0 -11.955 Td [(strategy)111(,)-313(and)-301(the)-300(equation/unknown)-301(or)18(dering.)-461(The)-301(matrix)-301(it)1(self)-301(can)-301(be)-300(inter)18(-)]TJ 0 -11.955 Td [(pr)18(eted)-291(as)-291(the)-291(adjacency)-291(matrix)-292(of)-291(the)-291(graph)-291(associated)-291(with)-291(the)-291(discr)18(etization)]TJ 0 -11.955 Td [(mesh.)]TJ 14.944 -11.955 Td [(The)-308(distribution)-308(of)-308(the)-309(coef)18(\002cient)-308(matrix)-308(for)-308(the)-308(linear)-309(system)-308(is)-308(based)-308(on)]TJ -14.944 -11.955 Td [(the)-314(\223owner)-314(computes\224)-314(r)8(ule:)-438(the)-314(variable)-314(associated)-314(to)-314(each)-314(mesh)-314(point)-314(is)-314(as-)]TJ +0 g 0 G + 169.365 -29.888 Td [(2)]TJ +0 g 0 G +ET + +endstream +endobj +895 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 329.581 334.742 341.641] -/A << /S /GoTo /D (subsection.7.14) >> +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/psblas.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 901 0 R +/BBox [0 0 197 215] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 902 0 R +>>/Font << /R8 903 0 R>> >> -% 726 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 317.487 305.523 329.547] -/A << /S /GoTo /D (subsection.7.15) >> +/Length 898 +/Filter /FlateDecode >> -% 727 0 obj +stream +xœµVM7 ½ëWèÖ4€Y‘ú>&@[HMl ‡¢câu·;{þý’#QvvÝKÐÂÏ{#=RüÐð«u€ÖɯÿOóÓÇl÷g³°öã¯ýá´7_ ¶çþ7ìÛ // ¢Ãl7÷¦É E„ŒŽ,E(%’ÝÌ«õîô°íúq{:ï~Üüe0ƒsµØÀíæ³yõöý›µ¼ûyc>Ÿ(Ú¿M, +¯ ¼”ìá3›"dQžÙ7_Û³ËXm2‚0— +¼î(:HhU3vܪÅ“Aª f6è=A΃AŸÄ!9`³Ç +PwæAt_UOÏ¡OæO³æl8f¢ãPHÄY¥æ«‡åÕÌ+k(*«¦XÁI䓉uyQ¥ +Åá ¢ï¹Éø*Þcv ( ñÌÿôÚÇJÊBt«FpÍ©vvË×VþÕ‘” +„rÓ‘û×&@^ +éÿ2!)Õš\‘Œ÷¼ža®8Î7L¤ÂõŸÇ )]aôà¥`;¡vç ãp½ªba=WY³¨Hj.8‰µ2Rt%ùÁ` R=EÝ 4¢{4_póXÕFwt{Š[Íf.óÈv¾†Ê‘yÎÌÆGÞ§Áxq€Èúš‹ú‚c—++SJëgÅ¡Br5XUäÛFZYMv8™\øH”˜M!‚¢%rl9ª§¸Ù›Œbõh¾0ÝgÔ3©EÅ-:º‹¼i¾>9ÎßÑò…/aëUËv±Òu‹ß`ú®›Lk„¡ÀtÕ—ßö¼E÷õ¶Ž¿áæsù—¼0ȽýßI +~·œíÕÏãÎh§ÀP‰ó+‰I‰Ë`ä8Äwsâ~L¥¼–U9‰–2ó¢ß”ÇM®F‘ +ÁóÕÞ²þ-ÃÉu1 ƒéuÄóŠ +}¦öqëXË|Vb´A—Ó6QƒŠY¡8Õ†% +Ûæ“Ž=…åËE²A5) +í}óiV<\îrãDÝ âÖß7x¤U +«Ͻ'ƒÇ›ÇÇùaÚ>=|9Êh±Büuæ¹£$îËLËàq·–ÑÃÊŸ5×k^Þ½;>íN÷Ûi™ZŠ\V+9D£­8îNËLÓG™÷×»~0+¾’”àŠ'¢ˆ±íúmw>o÷;{·=ŸŽûEý—»a¥ÃѲîîÝâ8SË4Â%ÕÇ¥_¾œžNÛ#OαéƒùüÐ +endstream +endobj +905 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 305.393 232.786 317.453] -/A << /S /GoTo /D (subsection.7.16) >> +/Filter /FlateDecode +/Subtype /Type1C +/Length 2887 >> -% 728 0 obj +stream +xœe–iXSWÇo É=Z¤-iʦ µ­m]‘Ö*ÕVYÔZQU (! KBI ,A8,!, Ö„M‚T¥*(¶ÖQ¬ ÖœÇn:Jg9—¹|˜‹vž~˜/÷¹÷žåyß÷ÿ?¿÷Ð0‡%Fs ¤ðÓ׋SbE‹ßë±b ±’IÑü?æw0VbôåБšV82ÑÔ«Èò2*x£Óhr}›¿X’•&HHÌð|÷ppø{k×®ûóÏ&ÏãYÿñ ৠDž«©_(–¤ðEÛ<ý©ÙB¡à„g‚0K’˜îÇ[\+ä'{î‰Xæù®ÿ{žÞ^^›ÖSÍ)Ç¥éžÏ#÷< öñ ô æ'H…±iÿ?‚a˜›¯ÈO,IÛž±76ðÄ~~|P‚ Dèéåý>†­ÂbobAØ!,{ Åck0?l=€mÀvc{°MاØgØØ~Œ†½Š±°×0wªx˜ÆÇîѲiW—ì]ÒA~Êá‡}¿2ü&&ÎŒg^Æ·g YúêÒK-=½ ,Ó'æã¡E oXhóafö±rMMⲌ¸îšÉ$ãrö‘«s²£‹Ýä(ÊŒGgm†ò:‘{-e sp­A¢O…*7˜¦È+rT…/¨Q áȆic8'¡‰p¿DûvÌÐë‰UìÊòšÒZlÕÊ(.YŠÃà\UDP¢DQ¡4@;@§ðÏGš =œkîå’|ª‘‹“âLx”.ÇÏ”…ÿ»ì{T¨Ø·‡ó/(‹Êä†f»9Ío„fy1Û'7;#ÆÒÎ}„pVRÃü6Ë$Çg´†\è ‚pè«Vo= XIrôW|N5õ²w[Fàçp8k@Ø‘b•>«36Õ´6–°­ÙÔ7莼'H_Žù˜ý7xVkWRËÇÓ­'ÝáaY܉Œä¬ãù{!ˆRUwsÑ e“¾ÚÄf²„ŸŒöþ8Œ·žKUEc#¼m´ŽPÝtTNÄ°É×½Ö’ÒãçÕÈ9?ýâ —OÈ׸j1{æÊZr%É8¶gg\|뀌+˾§À“ÑkSj/2¥Aј…R4Ý„ÓUç»3HúÈ…¥­&V²«Kõ°‚SzÍîB"Ãssƒ +kµiqÖ±F|g©º~ Ð&¼7©U3Zör@o£Wv=^ÂK;ÌãÞÇ ËËÕ¡ +7´f'»ó²ýìs;H@Ò£vú ¶wrœÐ#Jã-´{3t=áή+¯.5@ÐVÍã’å8Œ,(Ø—CiW`Â÷”+Lp )Þ}æ¼µ²Z«©åÔ«j ´šl=2‹ V”Åý ×–…IÔAr7'â hF“ƒ¨ºŸRôïs.¬D"e³‘2ÏÙ–¥Üù6JcÚt•VZÁD جlØÚ˜'Ñ›Œ“u_Žß)4ªà.°° ‡[sÕ;´”U{Ìø'ª:x z”†€á¦iα,êfç ×Ï-Ö:ôtƒ]„V0r™ÅÅy%%EЭjÊru€ÕP%•V¤zlŽðé>1ÍHêÍlH‡B7^²8B 46Ë9²¶¼õ8È&½ŒL}MEUV`5¬+n¡Äh(´Y‹=î5që|zÏA;—ľ×+Û Õm Ëvv¼SàÝBÕ¸K3F8v;‘–Ü?ãšEUD›us5Ìš’òBNA^a^†]X³¹…Zwx²´¸¼°n~A^dn錹r®¿u¬‡£®•¥)4R说~ÉE§qÖì3梌æy3 ástêÓØp§:gËIªTç͸OyvüÔ©ù¦Î8SÌäy9>ûâ,9Ašñà µÕùò÷è݇.¬âÀ×l…*G«„ £ n˜‹Âq8¦íP¶Jí1æ ÖûFfX2[Z›,Í%º=·¸º¤ +êµ»yàœUt˜s'7ìWäÇð¥™*L¬ˆÀaÞäð`ÓØ$‡e ×5Ê=NÁ&Co?5Dnüž …ÚÕ7 'a«Øøx‡wºQ¬öû ]£\K¾L™vÂÜÕßT§5rêS šZÌ-v^ûÁýa’£©ÜT^~BÉ6à“üyƒ ýrsüño4QFÌ­…wºœðhh@çaÿÛÊF4 +´ÇÐ0»½¸¡¤NË5gZ¯vÛ'á×pHÕ/é81üm=…Œ&«Ò7¿FýŸsaK¾fGÁ‹ân/W.š%$dV½ÞŠ<®ykMP$‰gªJË2ž3W4ŽÞ· —6”tZg£Ùn¡ìûVH"\Ù§?Á3¡²G»‡þšÜÝ‹ºhLø^]~Í"KÎ""Œ-ÉÈ‹›3l-ͶŒÕô¾PÙ¥‚Xœ§!Õ3Ö +"d~+[Æ,ÊÈÍU@ TT(ª«§.>¾*Ù#òI¢DaV, ‡Û†CCÐmþØñ6~µZ'ƒéàÓÈ£~Þ—S$'ƒÙI&2š™¬@s©¡ÎäQ ŠÌÔNÊþü„?~ðûw¡×I×)Øax\ê¿62$Oèåt%×¥ÖQíâyú´_gèæy:»’Y[j(­‚ »FEõÎTV©µT®¹&<°"§^Ä/xQY¸‚¯Ù­psBå”+TíóÎí´Ö§Èø”ŽôTj¡0VŸx(„Oâ\ +IZ¹t4¤w!}^€gì½—ú€h9@±^è Ò…s2•=3B2H>ÉçynÞÌû7ŠC‰#ˆþhñÚï-whèÇGtBŒV²Õºø`uÞ1èFæ3ÑvðøçÓ÷à´ÛoÝ34\z"ž“,P&+š´®¿Ú»oB03¼å㣶xsÉÝä!FáŽ/†m'~¼êŒn?àßvaÝDëÑ÷ì+°¥¸5ïšôt0Ü ¶æ+ÉÔ·ÆsRz…t¥J–xFzëþƒöÁQîè`û¼/dŠº2›åž™j@7GÛú.º?Üwyc$O‘œÀI‘(S¥ME®ƒßŒô| Áõ^`‚2!-›’"VîIN™"Ì‚B-ÌÎe³/uV9:Î6:.Ç°ÿMɪH +endstream +endobj +911 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 293.299 242.519 305.359] -/A << /S /GoTo /D (subsection.7.17) >> +/Length 8252 >> -% 729 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 99.895 706.129 Td [(signed)-263(to)-264(a)-263(pr)18(ocess)-263(that)-263(will)-264(own)-263(the)-263(corr)18(esponding)-263(r)18(ow)-263(in)-264(the)-263(coef)18(\002cient)-263(ma-)]TJ 0 -11.955 Td [(trix)-406(and)-406(will)-406(carry)-405(out)-406(all)-406(r)18(elated)-406(computations.)-778(This)-406(all)1(ocation)-406(strategy)-406(is)]TJ 0 -11.955 Td [(equivalent)-353(to)-353(a)-353(partition)-353(of)-353(the)-353(discr)18(etization)-353(mesh)-353(in)1(to)]TJ/F60 9.9626 Tf 253.543 0 Td [(sub-domains)]TJ/F62 9.9626 Tf 51.107 0 Td [(.)-619(Our)-353(li-)]TJ -304.65 -11.956 Td [(brary)-220(supports)-220(any)-220(distribution)-220(that)-220(keeps)-220(together)-220(the)-220(coef)18(\002cients)-220(of)-220(each)-220(ma-)]TJ 0 -11.955 Td [(trix)-244(r)18(ow;)-245(ther)18(e)-244(ar)18(e)-243(no)-244(other)-243(constraints)-244(on)-243(the)-243(variable)-244(assignment.)-308(This)-243(choice)]TJ 0 -11.955 Td [(is)-324(consistent)-324(with)-324(simple)-324(data)-325(distributions)-324(such)-324(as)]TJ/F67 9.9626 Tf 232.237 0 Td [(CYCLIC\050N\051)]TJ/F62 9.9626 Tf 50.302 0 Td [(and)]TJ/F67 9.9626 Tf 20.095 0 Td [(BLOCK)]TJ/F62 9.9626 Tf 26.152 0 Td [(,)-324(as)]TJ -328.786 -11.955 Td [(well)-310(as)-309(completely)-310(arbitrary)-310(assignments)-309(of)-310(equation)-310(indices)-309(to)-310(pr)18(ocesses.)-489(In)]TJ 0 -11.955 Td [(particular)-250(it)-250(is)-251(consistent)-250(with)-250(the)-250(usage)-250(of)-251(graph)-250(partitioning)-250(tools)-250(commonly)]TJ 0 -11.956 Td [(available)-333(in)-332(the)-333(literatur)18(e,)-353(e.g.)-558(METIS)-332([)]TJ +1 0 0 rg 1 0 0 RG + [(13)]TJ +0 g 0 G + [(].)-558(Dense)-333(ve)1(ctors)-333(conform)-333(to)-332(sparse)]TJ 0 -11.955 Td [(matrices,)-257(that)-255(is,)-257(the)-255(entries)-255(of)-256(a)-255(vector)-255(follow)-256(the)-255(same)-255(distribution)-256(of)-255(the)-255(ma-)]TJ 0 -11.955 Td [(trix)-250(r)18(ows.)]TJ 14.944 -12.648 Td [(W)92(e)-343(assume)-344(that)-343(the)-344(sparse)-343(matrix)-343(is)-344(built)-343(in)-344(parallel,)-366(wher)18(e)-344(each)-343(pr)18(ocess)]TJ -14.944 -11.955 Td [(generates)-254(its)-254(own)-255(portion.)-322(W)92(e)-255(never)-254(r)18(equir)18(e)-254(that)-254(the)-255(entir)18(e)-254(matrix)-254(be)-254(available)]TJ 0 -11.955 Td [(on)-288(a)-288(single)-288(node.)-423(However)74(,)-298(it)-287(is)-288(possible)-288(to)-288(hold)-288(the)-288(entir)18(e)-288(matrix)-287(in)-288(one)-288(pr)18(o-)]TJ 0 -11.955 Td [(cess)-241(and)-242(distribute)-241(it)-241(explicitly)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 133.807 3.616 Td [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 4.284 -3.616 Td [(,)-243(even)-241(though)-242(the)-241(r)18(esulting)-241(memory)-241(bottleneck)]TJ -138.091 -11.955 Td [(would)-250(make)-250(this)-250(option)-250(unattractive)-250(in)-250(most)-250(cases.)]TJ/F59 11.9552 Tf 0 -33.074 Td [(2.1)-1000(Basic)-250(Nomenclature)]TJ/F62 9.9626 Tf 0 -20.306 Td [(Our)-301(computational)-301(model)-301(implies)-301(that)-301(the)-301(data)-301(al)1(location)-301(on)-301(the)-301(parallel)-301(dis-)]TJ 0 -11.955 Td [(tributed)-370(memory)-369(machine)-370(is)-370(guided)-370(by)-369(the)-370(str)8(uctur)18(e)-370(of)-370(the)-369(physical)-370(model,)]TJ 0 -11.955 Td [(and)-250(speci\002cally)-250(by)-250(the)-250(discr)18(etization)-250(mesh)-250(of)-250(the)-250(PDE.)]TJ 14.944 -12.648 Td [(Each)-400(point)-400(of)-400(the)-399(discr)18(etization)-400(mesh)-400(will)-400(have)-400(\050at)-400(least)1(\051)-400(one)-400(associated)]TJ -14.944 -11.955 Td [(equation/variable,)-416(and)-384(ther)18(efor)18(e)-383(one)-383(index.)-710(W)92(e)-383(say)-383(that)-384(point)]TJ/F60 9.9626 Tf 289.765 0 Td [(i)-403(depends)]TJ/F62 9.9626 Tf 42.709 0 Td [(on)]TJ -332.474 -11.955 Td [(point)]TJ/F60 9.9626 Tf 26.955 0 Td [(j)]TJ/F62 9.9626 Tf 6.004 0 Td [(if)-312(the)-312(equation)-312(for)-312(a)-312(variable)-313(associated)-312(with)]TJ/F60 9.9626 Tf 202.502 0 Td [(i)]TJ/F62 9.9626 Tf 6.074 0 Td [(contains)-312(a)-312(term)-312(in)]TJ/F60 9.9626 Tf 84.153 0 Td [(j)]TJ/F62 9.9626 Tf 2.894 0 Td [(,)-328(or)]TJ -328.582 -11.955 Td [(equivalently)-291(if)]TJ/F60 9.9626 Tf 67.321 0 Td [(a)]TJ/F60 7.5716 Tf 4.59 -1.96 Td [(i)-67(j)]TJ/F91 10.3811 Tf 8.967 1.96 Td [(6)]TJ/F93 10.3811 Tf 0.249 0 Td [(=)]TJ/F62 9.9626 Tf 11.726 0 Td [(0.)-434(After)-292(the)-291(partition)-292(of)-291(the)-292(discr)18(etization)-291(mesh)-292(into)]TJ/F60 9.9626 Tf 233.514 0 Td [(sub-)]TJ -326.367 -11.955 Td [(domains)]TJ/F62 9.9626 Tf 37.559 0 Td [(assigned)-381(to)-381(the)-381(parallel)-381(pr)18(ocesses,)-413(we)-381(classify)-381(the)-381(points)-381(of)-381(a)-381(given)]TJ -37.559 -11.955 Td [(sub-domain)-250(as)-250(following.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.003 Td [(Internal.)]TJ +0 g 0 G +/F62 9.9626 Tf 43.995 0 Td [(An)-359(internal)-359(poi)1(nt)-359(of)-359(a)-359(given)-359(domain)]TJ/F60 9.9626 Tf 168.65 0 Td [(depends)]TJ/F62 9.9626 Tf 35.684 0 Td [(only)-359(on)-359(points)-358(of)-359(the)]TJ -223.422 -11.955 Td [(same)-264(domain.)-351(If)-264(all)-264(points)-264(of)-264(a)-264(domain)-263(ar)18(e)-264(assigned)-264(to)-264(one)-264(pr)18(ocess,)-267(then)]TJ 0 -11.956 Td [(a)-196(computational)-196(step)-195(\050e.g.,)-207(a)-196(matrix-vector)-196(pr)18(oduct\051)-196(of)-195(the)-196(equations)-196(asso-)]TJ 0 -11.955 Td [(ciated)-214(with)-213(the)-214(internal)-214(points)-214(r)18(equir)18(es)-213(no)-214(data)-214(items)-214(fr)18(om)-213(other)-214(domains)]TJ 0 -11.955 Td [(and)-250(no)-250(communications.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.695 Td [(Boundary)92(.)]TJ +0 g 0 G +/F62 9.9626 Tf 51.397 0 Td [(A)-192(point)-191(of)-192(a)-192(given)-191(domain)-192(is)-192(a)-191(boundary)-192(point)-192(if)-191(it)]TJ/F60 9.9626 Tf 217.552 0 Td [(depends)]TJ/F62 9.9626 Tf 34.019 0 Td [(on)-192(points)]TJ -278.061 -11.955 Td [(belonging)-250(to)-250(other)-250(domains.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.696 Td [(Halo.)]TJ +0 g 0 G +/F62 9.9626 Tf 29.609 0 Td [(A)-389(halo)-389(point)-389(for)-389(a)-389(given)-389(domain)-389(i)1(s)-389(a)-389(point)-389(belonging)-389(to)-389(another)-389(do-)]TJ -4.702 -11.955 Td [(main)-267(such)-267(that)-266(ther)18(e)-267(is)-267(a)-267(boundary)-267(point)-266(which)]TJ/F60 9.9626 Tf 212.474 0 Td [(depends)]TJ/F62 9.9626 Tf 34.767 0 Td [(on)-267(it.)-360(Whenever)]TJ -247.241 -11.955 Td [(performing)-360(a)-361(computational)-360(step,)-388(such)-361(as)-360(a)-361(matrix-vector)-360(pr)18(oduct,)-388(the)]TJ 0 -11.955 Td [(values)-274(associated)-273(with)-274(halo)-274(points)-274(ar)18(e)-274(r)18(equested)-273(fr)18(om)-274(other)-274(domains.)-381(A)]TJ 0 -11.955 Td [(boundary)-259(point)-258(of)-259(a)-258(given)-259(domain)-258(is)-259(usually)-258(a)-259(halo)-258(point)-259(for)-259(some)-258(other)]TJ 0 -11.956 Td [(domain)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 34.002 3.617 Td [(2)]TJ +0 g 0 G +/F62 9.9626 Tf 4.284 -3.617 Td [(;)-238(ther)18(efor)18(e)-232(the)-233(car)18(dinality)-232(of)-232(the)-232(boundary)-233(p)1(oints)-233(set)-232(denotes)-232(the)]TJ -38.286 -11.955 Td [(amount)-250(of)-250(data)-250(sent)-250(to)-250(other)-250(domains.)]TJ +0 g 0 G +ET +q +1 0 0 1 99.895 168.389 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +Q +BT +/F62 5.9776 Tf 110.755 161.427 Td [(1)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(In)-250(our)-250(pr)18(ototype)-250(implementation)-250(we)-250(pr)18(ovide)-250(sample)-250(scatter/gather)-250(r)18(outines.)]TJ/F62 5.9776 Tf -3.487 -6.922 Td [(2)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(This)-401(is)-402(the)-401(normal)-402(situation)-401(when)-402(the)-401(pattern)-402(of)-401(the)-402(sparse)-401(matrix)-402(is)-401(symmetric,)-440(which)-401(is)]TJ -14.347 -9.464 Td [(equivalent)-358(to)-358(say)-358(that)-358(the)-357(interaction)-358(between)-358(two)-358(variables)-358(is)-358(r)18(ecipr)18(ocal.)-634(If)-357(the)-358(matrix)-358(pattern)]TJ 0 -9.465 Td [(is)-241(non-symmetric)-241(we)-242(may)-241(have)-241(one-way)-241(interactions,)-243(and)-241(these)-241(could)-241(cause)-242(a)-241(situation)-241(in)-241(which)-241(a)]TJ 0 -9.464 Td [(boundary)-250(point)-250(is)-250(not)-250(a)-250(halo)-250(point)-250(for)-250(its)-250(neighbour)74(.)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 169.365 -29.888 Td [(3)]TJ +0 g 0 G +ET + +endstream +endobj +924 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 271.309 183.083 283.109] -/A << /S /GoTo /D (section.8) >> +/Length 4830 >> -% 730 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(Overlap.)]TJ +0 g 0 G +/F62 9.9626 Tf 44.553 0 Td [(An)-245(overlap)-244(point)-245(is)-245(a)-245(boundary)-244(point)-245(assigned)-245(to)-244(multiple)-245(domains.)]TJ -19.647 -11.955 Td [(Any)-204(operation)-204(that)-204(involves)-204(an)-204(overlap)-204(point)-204(has)-204(to)-204(be)-204(r)18(eplicated)-204(for)-204(each)]TJ 0 -11.955 Td [(assignment.)]TJ -24.906 -18.943 Td [(Overlap)-358(points)-359(do)-358(not)-358(usually)-359(exist)-358(in)-359(the)-358(basic)-358(data)-359(distributions;)-412(however)]TJ 0 -11.955 Td [(they)-325(ar)18(e)-326(a)-325(featur)18(e)-326(of)-325(Domain)-326(Decomposition)-325(Schwarz)-326(pr)18(econditioners)-325(which)]TJ 0 -11.956 Td [(ar)18(e)-250(the)-250(subject)-250(of)-250(r)18(elated)-250(r)18(esear)18(ch)-250(work)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(3)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-250(2)]TJ +0 g 0 G + [(].)]TJ 14.944 -11.955 Td [(W)92(e)-225(denote)-225(the)-225(sets)-225(of)-225(internal,)-230(boundary)-225(and)-225(halo)-225(points)-225(for)-225(a)-225(given)-225(subdo-)]TJ -14.944 -11.955 Td [(main)-251(by)]TJ/F91 10.3811 Tf 38.66 0 Td [(I)]TJ/F62 9.9626 Tf 6.53 0 Td [(,)]TJ/F91 10.3811 Tf 5.125 0 Td [(B)]TJ/F62 9.9626 Tf 9.753 0 Td [(and)]TJ/F91 10.3811 Tf 19.497 0 Td [(H)]TJ/F62 9.9626 Tf 8.972 0 Td [(.)-314(Each)-252(subdomain)-251(is)-252(assigned)-251(to)-252(one)-251(pr)18(ocess;)-253(each)-251(pr)18(ocess)]TJ -88.537 -11.955 Td [(usually)-346(owns)-346(one)-346(su)1(bdomain,)-370(although)-346(the)-346(user)-346(may)-346(choose)-345(to)-346(assign)-346(mor)18(e)]TJ 0 -11.955 Td [(than)-302(one)-301(subdomain)-302(to)-301(a)-302(pr)18(ocess.)-465(If)-302(each)-301(pr)18(ocess)]TJ/F60 9.9626 Tf 222.767 0 Td [(i)]TJ/F62 9.9626 Tf 5.968 0 Td [(owns)-302(one)-301(subdomain,)-315(the)]TJ -228.735 -11.956 Td [(number)-221(of)-221(r)18(ows)-221(in)-221(the)-221(local)-221(sparse)-221(matrix)-221(is)]TJ/F91 10.3811 Tf 192.655 0 Td [(j)-24(I)]TJ/F60 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 4.799 0 Td [(+)]TJ/F91 10.3811 Tf 9.989 0 Td [(j)-24(B)]TJ/F60 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F62 9.9626 Tf 3.003 0 Td [(,)-227(and)-221(the)-221(number)-221(of)-221(local)]TJ -235.248 -11.955 Td [(columns)-207(\050i.e.)-296(those)-207(for)-207(which)-207(ther)18(e)-208(exists)-207(at)-207(least)-207(one)-207(non-zer)18(o)-208(entry)-207(in)-207(the)-207(local)]TJ 0 -11.955 Td [(r)18(ows\051)-250(is)]TJ/F91 10.3811 Tf 37.275 0 Td [(j)-24(I)]TJ/F60 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.066 0 Td [(+)]TJ/F91 10.3811 Tf 10.255 0 Td [(j)-24(B)]TJ/F60 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.066 0 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(j)-24(H)]TJ/F60 7.5716 Tf 12.051 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F62 9.9626 Tf 3.004 0 Td [(.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +1 0 0 1 222.462 541.675 cm +q +0 -1 1 0 0 0 cm +q +.65 0 0 .65 0 0 cm +q +1 0 0 1 0 0 cm +/Im3 Do +Q +Q +Q +0 g 0 G +1 0 0 1 -222.462 -541.675 cm +BT +/F62 9.9626 Tf 260.803 335.398 Td [(Figur)18(e)-250(2:)-310(Point)-250(class\002cation.)]TJ +0 g 0 G +0 g 0 G + -95.154 -23.688 Td [(This)-190(classi\002cation)-190(of)-190(mesh)-190(points)-190(guides)-190(the)-190(naming)-190(scheme)-190(that)-190(we)-190(adopted)]TJ -14.944 -11.956 Td [(in)-190(the)-190(library)-190(internals)-190(and)-190(in)-190(the)-190(data)-190(str)8(uctur)18(es.)-290(W)92(e)-190(explicitly)-190(note)-190(that)-190(\223Halo\224)]TJ 0 -11.955 Td [(points)-250(ar)18(e)-250(also)-250(often)-250(called)-250(\223ghost\224)-250(points)-250(in)-250(the)-250(literatur)18(e.)]TJ/F59 11.9552 Tf 0 -28.902 Td [(2.2)-1000(Library)-250(contents)]TJ/F62 9.9626 Tf 0 -18.964 Td [(The)-250(PSBLAS)-250(library)-250(consists)-250(of)-250(various)-250(classes)-250(of)-250(subr)18(outines:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -18.943 Td [(Computational)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 113.723 0 Td [(comprising:)]TJ +0 g 0 G + -77.917 -19.434 Td [(\225)]TJ +0 g 0 G + [-500(Sparse)-250(matrix)-250(by)-250(dense)-250(matrix)-250(pr)18(oduct;)]TJ +0 g 0 G + 0 -15.449 Td [(\225)]TJ +0 g 0 G + [-500(Sparse)-250(triangular)-250(systems)-250(solution)-250(for)-250(block)-250(diagonal)-250(matrices;)]TJ +0 g 0 G + 0 -15.449 Td [(\225)]TJ +0 g 0 G + [-500(V)111(ector)-250(and)-250(matrix)-250(norms;)]TJ +0 g 0 G + 0 -15.449 Td [(\225)]TJ +0 g 0 G + [-500(Dense)-250(matrix)-250(sums;)]TJ +0 g 0 G + 0 -15.449 Td [(\225)]TJ +0 g 0 G + [-500(Dot)-250(pr)18(oducts.)]TJ +0 g 0 G +/F59 9.9626 Tf -35.806 -19.434 Td [(Communication)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 118.704 0 Td [(handling)-250(halo)-250(and)-250(overlap)-250(communications;)]TJ +0 g 0 G + 50.661 -29.888 Td [(4)]TJ +0 g 0 G +ET + +endstream +endobj +921 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 259.055 387.474 271.115] -/A << /S /GoTo /D (subsection.8.1) >> +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/points.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 927 0 R +/BBox [0 0 274 308] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 928 0 R +>>/Font << /R8 929 0 R>> >> -% 731 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 246.961 415.897 259.021] -/A << /S /GoTo /D (subsection.8.2) >> +/Length 1397 +/Filter /FlateDecode +>> +stream +xœÝYËŽ5Ý÷Wô’ q±Ën»ï6 +– 󣄠¹3ÊBü=®§«æ±bA‡HŒ}Ï©c·í²»?­é”ׄÿäïÍeùö]_?ü¾¤Ó©d êwßGüðaù´d"®òçæ²¾¾ä}ÍíëÕûe4­ß ,äýÔ×sÿ»º,_ýx÷Ç/w×·¯®~[¾»ZÞ.ø›Œ1¸ð™âuóâ¯ïÿ¼ûùúáoO*žþx/þÃõí½Î22Tø<ᜇd†&Âoî/×ïV˜âÿõèCê1V^õd¨æõãR ¬Û9ŸÎç¶^–ºµÓ¾ÍšÚýÝz¦zõ¯7‹!€S®ûj짔êJÚR¿–ðWZSöN•m˜´ ide«3çûfyÿõROÛú×|J_F¿~]~z2ò–}×òVÐÕämë¦Î€sQ<I<³¦uiüd¸r͵9.Ö¤¢ÆR’ÉÑãY~ОÐCÑÝ¥Ÿ}öçÙ^â<3LA ‰c‹YÒ¶®ôçY¯qž&mCÙØâÌû懣ç—Ñ#|H–_rƧšÇÒ³,wš0s>}yüÇ5ÒNóË p%U¤ –ðW@E’§$§•|¡pxõE`&ÆøåU ™¤ó«›%AÝIUÍ0Gš]ý‘&ûÖM’ î Jšx÷¬…T.ù)~¼C²8˜}~‚­ÛÍWÛ¢íÁvKÑö¶K,8ÛÍ—&†`[C*—ü¨ONÔÇs­ƒ ½m‚ê ò9؆Áu¶!×`{P9¦m‚êKI7oÛB*—ü¨O샹~ñ̳·Ç'­¡Á^ÝIaÏvRy!œzw'ó¤`Íx"0.Ѥb'…iÄù|ùÌs¼žP:-%X/[´^º“#Àa°há…dÞPÓY/)Z‡Ýqˆ&-VŠÖ½ON¬Çtnƒ®G±À¹ÍY–& é›Ë’וB¿Ìœ¤¡¹M…ÁnngäŽ%¤Ò#ØœÃÉÙÇ‚"d;’Àô)ùÃ(˜\X‹³Ž¥²£0}Z¡pø#`Ó†Sò‹%Hvt§Ð̧f£`ú`-Î+”ÐŽQ4ó9ƒ…Ç,x›O/,îf,z»âißn«ªÝìv«$½úæ-ÜŒå`?›“禩™|,ˆ7cïó™;Ìñº@!osõé]Š¦?ݲta0€yýÒ¥¤Zdy›«OïRÜ<%9­äƒ€[}拇ú6m8uõIPžþhǃf>m))…YÞæê“ Ò<%9­äƒ€[}ækçÿÜæ“WO’rõ= A} £ Ñ0'Ë 9‘S,irêÕ÷+\_ã­uâÝ¿›ÑÆE?æóé{¦ƒÙÇá'È‹ÎB#4_²$&†`[–’qq‘‘&/> Mõ5^_'†`[Bý˜OõºÖÁ–%©¡ ª/]07o[šqq ’&/M Íõ5^_'nÞ¶†4.ú1Ÿ6ØsýÜ¥%]Š!ƒCÞgVe@Ù–‹’…$)š5-ƒÃØ5}‡ä²?ÖLg+‡ |>{é>hO‘jøX5~,ê>–0àxÕ},1’š¬ác ”ø±ŠûX€5‹ûXb$3òø³ Ú…t¡í¡=Å>tpº8Õ‡’Ô$iÎ>´-ö¡Ç%ÀšTÔXJR#ÞgL¼í“-J/0®jãȶw.Þâªick£Z,”Ô¤š^”Ñk·ì«éUÝ ‹¯WjÇ‚µÛçƒ.ÁºUE³zÉgýãPˆ,é"›Ñe±ûÌ‹:t˜!*%~ Ö *«QÊÒ@emPMÓ1:¾Þ’àX¼÷(˜®4æ ¤Nƒ¾]þÎJ¦' +endstream +endobj +936 0 obj +<< +/Length 4927 >> -% 732 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(Data)-250(management)-250(and)-250(auxiliary)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 190.375 0 Td [(including:)]TJ +0 g 0 G + -154.569 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(Parallel)-250(envir)18(onment)-250(management)]TJ +0 g 0 G + 0 -18.081 Td [(\225)]TJ +0 g 0 G + [-500(Communication)-250(descriptors)-250(allocation;)]TJ +0 g 0 G + 0 -18.082 Td [(\225)]TJ +0 g 0 G + [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(allocation;)]TJ +0 g 0 G + 0 -18.081 Td [(\225)]TJ +0 g 0 G + [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(build)-250(and)-250(update;)]TJ +0 g 0 G + 0 -18.082 Td [(\225)]TJ +0 g 0 G + [-500(Sparse)-250(matrix)-250(and)-250(data)-250(distribution)-250(pr)18(epr)18(ocessing.)]TJ +0 g 0 G +/F59 9.9626 Tf -35.806 -24.207 Td [(Preconditioner)-250(routines)]TJ +0 g 0 G +0 g 0 G + 0 -24.208 Td [(Iterative)-250(methods)]TJ +0 g 0 G +/F62 9.9626 Tf 84.951 0 Td [(a)-250(subset)-250(of)-250(Krylov)-250(subspace)-250(iterative)-250(methods)]TJ -84.951 -23.137 Td [(The)-262(following)-263(naming)-262(scheme)-262(has)-262(been)-263(adopted)-262(for)-262(all)-263(the)-262(symbols)-262(internally)]TJ 0 -11.955 Td [(de\002ned)-250(in)-250(the)-250(PSBLAS)-250(softwar)18(e)-250(package:)]TJ +0 g 0 G + 13.888 -23.137 Td [(\225)]TJ +0 g 0 G + [-500(all)-250(symbols)-250(\050i.e.)-310(subr)18(outine)-250(names,)-250(data)-250(types...\051)-310(ar)18(e)-250(pr)18(e\002xed)-250(by)]TJ/F67 9.9626 Tf 294.184 0 Td [(psb_)]TJ +0 g 0 G +/F62 9.9626 Tf -294.184 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(all)-250(data)-250(type)-250(names)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F67 9.9626 Tf 166.604 0 Td [(_type)]TJ +0 g 0 G +/F62 9.9626 Tf -166.604 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(all)-250(constants)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F67 9.9626 Tf 135.59 0 Td [(_)]TJ +0 g 0 G +/F62 9.9626 Tf -135.59 -24.208 Td [(\225)]TJ +0 g 0 G + [-500(all)-279(top-level)-279(subr)18(outine)-279(names)-279(follow)-279(the)-279(r)8(ule)]TJ/F67 9.9626 Tf 216.11 0 Td [(psb_xxname)]TJ/F62 9.9626 Tf 55.083 0 Td [(wher)18(e)]TJ/F67 9.9626 Tf 30.187 0 Td [(xx)]TJ/F62 9.9626 Tf 13.241 0 Td [(can)]TJ -303.602 -11.955 Td [(be)-250(either:)]TJ +0 g 0 G +/F59 9.9626 Tf 11.955 -24.208 Td [(\226)]TJ +0 g 0 G +/F67 9.9626 Tf 9.963 0 Td [(ge)]TJ/F62 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(dense)-250(data,)]TJ +0 g 0 G +/F59 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ +0 g 0 G +/F67 9.9626 Tf 9.963 0 Td [(sp)]TJ/F62 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(sparse)-250(data,)]TJ +0 g 0 G +/F59 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ +0 g 0 G +/F67 9.9626 Tf 9.963 0 Td [(cd)]TJ/F62 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(communication)-250(descriptor)-250(\050see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3)]TJ +0 g 0 G + [(\051.)]TJ -32.378 -24.208 Td [(For)-215(example)-215(the)]TJ/F67 9.9626 Tf 72.515 0 Td [(psb_geins)]TJ/F62 9.9626 Tf 47.073 0 Td [(,)]TJ/F67 9.9626 Tf 4.704 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 49.218 0 Td [(and)]TJ/F67 9.9626 Tf 19.011 0 Td [(psb_cdins)]TJ/F62 9.9626 Tf 49.218 0 Td [(perform)-215(the)-215(same)]TJ -241.739 -11.955 Td [(action)-247(\050see)]TJ +0 0 1 rg 0 0 1 RG + [-246(6)]TJ +0 g 0 G + [(\051)-247(on)-246(dense)-247(matrices,)-247(sparse)-247(matric)1(es)-247(and)-247(communication)-246(de-)]TJ 0 -11.956 Td [(scriptors)-222(r)18(espectively)111(.)-301(Interface)-222(overloading)-223(allows)-222(the)-222(usage)-222(of)-223(the)-222(same)]TJ 0 -11.955 Td [(subr)18(outine)-250(names)-250(for)-250(both)-250(r)18(eal)-250(and)-250(complex)-250(data.)]TJ -24.907 -23.137 Td [(In)-288(the)-288(description)-288(of)-289(the)-288(subr)18(outines,)-297(ar)18(guments)-289(or)-288(ar)18(gument)-288(entries)-288(ar)18(e)-288(clas-)]TJ 0 -11.955 Td [(si\002ed)-250(as:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -23.137 Td [(global)]TJ +0 g 0 G +/F62 9.9626 Tf 33.763 0 Td [(For)-270(input)-270(ar)18(guments,)-275(the)-270(value)-271(must)-270(be)-270(the)-270(same)-270(on)-270(all)-270(pr)18(ocesses)-270(par)18(-)]TJ -8.856 -11.955 Td [(ticipating)-276(in)-277(the)-276(subr)18(outine)-277(call;)-289(for)-277(output)-276(ar)18(guments)-277(the)-276(value)-277(is)-276(guar)18(-)]TJ 0 -11.955 Td [(anteed)-250(to)-250(be)-250(the)-250(same.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -24.208 Td [(local)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(Each)-250(pr)18(ocess)-250(has)-250(its)-250(own)-250(value\050s\051)-250(independently)111(.)]TJ -26.56 -23.137 Td [(T)92(o)-250(\002nish)-250(our)-250(general)-250(description,)-250(we)-250(de\002ne)-250(a)-250(version)-250(string)-250(with)-250(the)-250(constant)]TJ/F67 9.9626 Tf 122.168 -24.059 Td [(psb_version_string_)]TJ/F62 9.9626 Tf -122.168 -24.059 Td [(whose)-250(curr)18(ent)-250(value)-250(is)]TJ/F67 9.9626 Tf 101.857 0 Td [(3.8.0)]TJ +0 g 0 G +/F62 9.9626 Tf 67.508 -29.888 Td [(5)]TJ +0 g 0 G +ET + +endstream +endobj +941 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 234.867 399.738 246.926] -/A << /S /GoTo /D (subsection.8.3) >> +/Length 8378 >> -% 733 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(2.3)-1000(Application)-250(structure)]TJ/F62 9.9626 Tf 0 -19.381 Td [(The)-244(main)-244(underlyi)1(ng)-244(principle)-244(of)-244(the)-244(PSBLAS)-243(library)-244(is)-244(that)-244(the)-243(library)-244(objects)]TJ 0 -11.956 Td [(ar)18(e)-236(cr)18(eated)-235(and)-236(exist)-235(with)-236(r)18(efer)18(ence)-235(to)-236(a)-235(discr)18(etized)-236(space)-236(t)1(o)-236(which)-236(ther)18(e)-235(corr)18(e-)]TJ 0 -11.955 Td [(sponds)-258(an)-257(index)-258(space)-257(and)-258(a)-258(matrix)-257(sparsity)-258(pattern.)-332(As)-258(an)-258(example,)-259(consider)]TJ 0 -11.955 Td [(a)-310(cell-center)18(ed)-309(\002nite-volume)-310(discr)18(etization)-310(of)-309(the)-310(Navier)18(-Stokes)-310(e)1(quations)-310(on)]TJ 0 -11.955 Td [(a)-234(simulation)-235(domain;)-239(the)-234(index)-235(space)-234(1)-179(.)-192(.)-192(.)]TJ/F60 9.9626 Tf 185.595 0 Td [(n)]TJ/F62 9.9626 Tf 7.998 0 Td [(is)-234(isomorphic)-235(to)-234(the)-234(set)-235(of)-234(cell)-234(cen-)]TJ -193.593 -11.955 Td [(ters,)-210(wher)18(eas)-200(the)-201(pattern)-200(of)-200(the)-201(associated)-200(linear)-200(system)-200(matrix)-201(is)-200(isomorphic)-200(to)]TJ 0 -11.956 Td [(the)-294(adjacency)-294(graph)-294(imposed)-294(on)-294(the)-294(discr)18(et)1(ization)-294(mesh)-294(by)-294(the)-294(discr)18(etization)]TJ 0 -11.955 Td [(stencil.)]TJ 14.944 -12.17 Td [(Thus)-343(the)-343(\002rst)-343(or)18(der)-344(of)-343(business)-343(is)-343(to)-343(establish)-343(an)-343(index)-343(space,)-367(and)-343(this)-343(is)]TJ -14.944 -11.955 Td [(done)-287(with)-287(a)-287(call)-287(to)]TJ/F67 9.9626 Tf 85.52 0 Td [(psb_cdall)]TJ/F62 9.9626 Tf 49.932 0 Td [(in)-287(which)-287(we)-287(specify)-287(the)-287(size)-287(of)-287(the)-287(index)-287(space)]TJ/F60 9.9626 Tf -135.328 -11.956 Td [(n)]TJ/F62 9.9626 Tf 8.041 0 Td [(and)-238(the)-239(allocation)-238(of)-239(the)-239(ele)1(ments)-239(of)-239(the)-238(index)-239(space)-238(to)-239(the)-238(various)-239(pr)18(ocesses)]TJ -8.165 -11.955 Td [(making)-250(up)-250(the)-250(MPI)-250(\050virtual\051)-250(parallel)-250(machine.)]TJ 14.944 -12.17 Td [(The)-366(index)-367(space)-366(is)-366(partitioned)-367(among)-366(pr)18(ocesses,)-396(and)-366(this)-366(cr)18(eates)-367(a)-366(map-)]TJ -14.944 -11.956 Td [(ping)-301(fr)18(om)-300(the)-301(\223global\224)-301(numbering)-300(1)-180(.)-191(.)-192(.)]TJ/F60 9.9626 Tf 176.584 0 Td [(n)]TJ/F62 9.9626 Tf 8.659 0 Td [(to)-301(a)-300(numbering)-301(\223local\224)-301(to)-301(each)-300(pr)18(o-)]TJ -185.243 -11.955 Td [(cess;)-230(each)-221(pr)18(ocess)]TJ/F60 9.9626 Tf 79.682 0 Td [(i)]TJ/F62 9.9626 Tf 5.162 0 Td [(will)-221(own)-220(a)-221(certain)-220(subset)-221(1)-179(.)-192(.)-192(.)]TJ/F60 9.9626 Tf 130.532 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F60 5.9776 Tf 17.537 -1.649 Td [(i)]TJ/F62 9.9626 Tf 2.775 3.143 Td [(,)-226(each)-221(element)-221(of)-220(which)]TJ -241.352 -11.955 Td [(corr)18(esponds)-258(to)-259(a)-258(certain)-258(element)-258(of)-259(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 177.035 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 0 Td [(.)-335(The)-258(user)-259(doe)1(s)-259(not)-258(set)-258(explicitly)-259(this)]TJ -182.699 -11.955 Td [(mapping;)-225(when)-212(the)-212(application)-212(needs)-213(to)-212(indicate)-212(to)-212(which)-213(el)1(ement)-213(of)-212(the)-212(index)]TJ 0 -11.955 Td [(space)-305(a)-306(certain)-305(item)-306(is)-305(r)18(elated,)-320(such)-305(as)-306(the)-305(r)18(ow)-306(and)-305(column)-306(index)-305(of)-306(a)-305(matrix)]TJ 0 -11.956 Td [(coef)18(\002cient,)-283(it)-276(does)-277(so)-276(in)-277(the)-276(\223global\224)-277(numb)1(ering,)-284(and)-276(the)-276(library)-277(will)-276(translate)]TJ 0 -11.955 Td [(into)-250(the)-250(appr)18(opriate)-250(\223local\224)-250(numbering.)]TJ 14.944 -12.17 Td [(For)-324(a)-325(given)-324(index)-324(space)-325(1)-179(.)-191(.)-192(.)]TJ/F60 9.9626 Tf 129.74 0 Td [(n)]TJ/F62 9.9626 Tf 8.895 0 Td [(ther)18(e)-324(ar)18(e)-325(many)-324(possible)-324(associated)-325(topolo-)]TJ -153.579 -11.956 Td [(gies,)-213(i.e.)-295(many)-204(dif)18(fer)18(ent)-204(discr)18(etization)-204(stencils;)-220(thus)-204(the)-204(description)-204(of)-204(the)-204(index)]TJ 0 -11.955 Td [(space)-277(is)-278(not)-277(completed)-278(until)-277(the)-277(user)-278(has)-277(de\002ned)-278(a)-277(sparsity)-278(p)1(attern,)-285(either)-277(ex-)]TJ 0 -11.955 Td [(plicitly)-263(thr)18(ough)]TJ/F67 9.9626 Tf 71.63 0 Td [(psb_cdins)]TJ/F62 9.9626 Tf 49.698 0 Td [(or)-264(im)1(plicitly)-264(thr)18(ough)]TJ/F67 9.9626 Tf 95.326 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)-351(T)1(he)-264(descriptor)-263(is)]TJ -263.727 -11.955 Td [(\002nalized)-225(with)-225(a)-225(call)-226(to)]TJ/F67 9.9626 Tf 98.787 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 49.316 0 Td [(and)-225(a)-225(sparse)-225(matrix)-226(with)-225(a)-225(call)-225(to)]TJ/F67 9.9626 Tf 146.044 0 Td [(psb_spasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)]TJ -341.22 -11.955 Td [(After)]TJ/F67 9.9626 Tf 26.16 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 50.21 0 Td [(each)-315(pr)18(ocess)]TJ/F60 9.9626 Tf 59.13 0 Td [(i)]TJ/F62 9.9626 Tf 6.101 0 Td [(will)-315(have)-315(de\002ned)-315(a)-314(set)-315(of)-315(\223halo\224)-315(\050or)-315(\223ghost\224\051)]TJ -141.601 -11.955 Td [(indices)]TJ/F60 9.9626 Tf 34.731 0 Td [(n)]TJ/F62 9.9626 Tf 5.663 -1.495 Td [(r)18(ow)]TJ/F60 5.9776 Tf 17.538 -1.648 Td [(i)]TJ/F93 10.3811 Tf 5.211 3.143 Td [(+)]TJ/F62 9.9626 Tf 10.506 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -3.831 Td [(col)]TJ/F60 5.9776 Tf 12.794 -1.648 Td [(i)]TJ/F62 9.9626 Tf 2.775 5.479 Td [(,)-377(denoting)-352(elements)-351(of)-352(the)-352(index)-351(space)-352(that)-351(ar)18(e)]TJ/F60 9.9626 Tf 215.582 0 Td [(not)]TJ/F62 9.9626 Tf -330.431 -13.79 Td [(assigned)-289(to)-290(pr)18(ocess)]TJ/F60 9.9626 Tf 88.744 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(;)-309(however)-290(t)1(he)-290(variables)-289(associated)-290(with)-289(them)-290(ar)18(e)-289(needed)]TJ -91.708 -11.955 Td [(to)-289(complete)-289(computations)-289(associated)-289(with)-290(the)-289(sparse)-289(matrix)]TJ/F60 9.9626 Tf 269.662 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(,)-299(and)-289(thus)-289(they)]TJ -276.98 -11.955 Td [(have)-266(to)-266(be)-266(fetched)-265(fr)18(om)-266(\050neighbouring\051)-266(pr)18(ocesses.)-358(The)-266(descriptor)-265(of)-266(the)-266(index)]TJ 0 -11.956 Td [(space)-294(is)-293(built)-294(exactly)-294(for)-293(the)-294(purpose)-294(of)-293(pr)18(operly)-294(sequencing)-294(the)-293(communica-)]TJ 0 -11.955 Td [(tion)-250(steps)-250(r)18(equir)18(ed)-250(to)-250(achieve)-250(this)-250(objective.)]TJ 14.944 -12.17 Td [(A)-197(simple)-197(application)-197(str)8(uctur)18(e)-197(will)-197(walk)-197(thr)18(ough)-197(the)-197(index)-197(space)-197(allocation,)]TJ -14.944 -11.956 Td [(matrix/vector)-250(cr)18(eation)-250(and)-250(linear)-250(system)-250(solution)-250(as)-250(follows:)]TJ +0 g 0 G + 12.453 -20.571 Td [(1.)]TJ +0 g 0 G + [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F67 9.9626 Tf 171.465 0 Td [(psb_init)]TJ +0 g 0 G +/F62 9.9626 Tf -171.465 -20.787 Td [(2.)]TJ +0 g 0 G + [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F67 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ +0 g 0 G +/F62 9.9626 Tf -130.489 -20.788 Td [(3.)]TJ +0 g 0 G + [-500(Allocate)-221(sparse)-221(matrix)-221(and)-221(dense)-221(vectors)-220(with)]TJ/F67 9.9626 Tf 215.843 0 Td [(psb_spall)]TJ/F62 9.9626 Tf 49.274 0 Td [(and)]TJ/F67 9.9626 Tf 19.068 0 Td [(psb_geall)]TJ +0 g 0 G +/F62 9.9626 Tf -284.185 -20.787 Td [(4.)]TJ +0 g 0 G + [-500(Loop)-320(over)-320(all)-320(local)-320(r)18(ows,)-338(generate)-320(matrix)-320(and)-320(vector)-320(entries,)-337(and)-320(insert)]TJ 12.454 -11.955 Td [(them)-250(with)]TJ/F67 9.9626 Tf 47.849 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 49.564 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ +0 g 0 G +/F62 9.9626 Tf -129.225 -20.787 Td [(5.)]TJ +0 g 0 G + [-500(Assemble)-250(the)-250(various)-250(entities:)]TJ +0 g 0 G + 17.774 -20.787 Td [(\050a\051)]TJ +0 g 0 G +/F67 9.9626 Tf 16.597 0 Td [(psb_cdasb)]TJ +0 g 0 G +/F62 9.9626 Tf -17.125 -16.371 Td [(\050b\051)]TJ +0 g 0 G +/F67 9.9626 Tf 17.125 0 Td [(psb_spasb)]TJ +0 g 0 G +/F62 9.9626 Tf -16.039 -16.371 Td [(\050c\051)]TJ +0 g 0 G +/F67 9.9626 Tf 16.039 0 Td [(psb_geasb)]TJ +0 g 0 G +/F62 9.9626 Tf 122.541 -29.888 Td [(6)]TJ +0 g 0 G +ET + +endstream +endobj +954 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 222.773 444.603 234.832] -/A << /S /GoTo /D (subsection.8.4) >> +/Length 7484 >> -% 744 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 112.349 706.129 Td [(6.)]TJ +0 g 0 G + [-500(Choose)-301(the)-300(pr)18(econditioner)-301(to)-300(be)-301(used)-300(with)]TJ/F67 9.9626 Tf 205.537 0 Td [(prec%init)]TJ/F62 9.9626 Tf 50.068 0 Td [(and)-301(build)-300(it)-301(with)]TJ/F67 9.9626 Tf -243.152 -11.955 Td [(prec%build)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 52.303 3.616 Td [(3)]TJ +0 g 0 G +/F62 9.9626 Tf 4.284 -3.616 Td [(.)]TJ +0 g 0 G + -69.04 -22.307 Td [(7.)]TJ +0 g 0 G + [-500(Call)-190(the)-190(iterative)-190(driver)]TJ/F67 9.9626 Tf 115.326 0 Td [(psb_krylov)]TJ/F62 9.9626 Tf 54.196 0 Td [(with)-190(the)-190(method)-190(of)-190(choice,)-202(e.g.)]TJ/F67 9.9626 Tf 134.982 0 Td [(bicgstab)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ -358.801 -21.712 Td [(This)-250(is)-250(the)-250(str)8(uctur)18(e)-250(of)-250(the)-250(sample)-250(pr)18(ograms)-250(in)-250(the)-250(dir)18(ectory)]TJ/F67 9.9626 Tf 266.418 0 Td [(test/pargen/)]TJ/F62 9.9626 Tf 62.764 0 Td [(.)]TJ -314.238 -12.551 Td [(For)-257(a)-258(simulation)-257(in)-257(which)-257(the)-258(same)-257(discr)18(etization)-257(mesh)-257(is)-258(used)-257(over)-257(multi-)]TJ -14.944 -11.955 Td [(ple)-250(time)-250(steps,)-250(the)-250(following)-250(str)8(uctur)18(e)-250(may)-250(be)-250(mor)18(e)-250(appr)18(opriate:)]TJ +0 g 0 G + 12.454 -21.712 Td [(1.)]TJ +0 g 0 G + [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F67 9.9626 Tf 171.464 0 Td [(psb_init)]TJ +0 g 0 G +/F62 9.9626 Tf -171.464 -22.307 Td [(2.)]TJ +0 g 0 G + [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F67 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ +0 g 0 G +/F62 9.9626 Tf -130.489 -22.307 Td [(3.)]TJ +0 g 0 G + [-500(Loop)-248(over)-248(the)-248(topology)-248(of)-248(the)-248(discr)18(eti)1(zation)-248(mesh)-248(and)-248(build)-248(the)-248(descrip-)]TJ 12.453 -11.955 Td [(tor)-250(with)]TJ/F67 9.9626 Tf 37.857 0 Td [(psb_cdins)]TJ +0 g 0 G +/F62 9.9626 Tf -50.31 -22.307 Td [(4.)]TJ +0 g 0 G + [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F67 9.9626 Tf 144.386 0 Td [(psb_cdasb)]TJ +0 g 0 G +/F62 9.9626 Tf -144.386 -22.307 Td [(5.)]TJ +0 g 0 G + [-500(Allocate)-190(the)-190(sparse)-190(matrices)-190(and)-190(dense)-190(vectors)-190(with)]TJ/F67 9.9626 Tf 237.676 0 Td [(psb_spall)]TJ/F62 9.9626 Tf 48.966 0 Td [(and)]TJ/F67 9.9626 Tf 18.759 0 Td [(psb_geall)]TJ +0 g 0 G +/F62 9.9626 Tf -305.401 -22.308 Td [(6.)]TJ +0 g 0 G + [-500(Loop)-250(over)-250(the)-250(time)-250(steps:)]TJ +0 g 0 G + 17.773 -22.307 Td [(\050a\051)]TJ +0 g 0 G + [-500(If)-297(after)-298(\002rst)-297(time)-298(step,)-309(r)18(einitialize)-297(the)-298(sparse)-297(matrix)-298(with)]TJ/F67 9.9626 Tf 269.151 0 Td [(psb_sprn)]TJ/F62 9.9626 Tf 41.843 0 Td [(;)]TJ -294.396 -11.955 Td [(also)-250(zer)18(o)-250(out)-250(the)-250(dense)-250(vectors;)]TJ +0 g 0 G + -17.126 -17.131 Td [(\050b\051)]TJ +0 g 0 G + [-500(Loop)-428(over)-429(the)-428(mesh,)-473(generate)-429(the)-428(coef)18(\002cients)-429(and)-428(insert/update)]TJ 17.126 -11.955 Td [(them)-250(with)]TJ/F67 9.9626 Tf 47.85 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 49.563 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ +0 g 0 G +/F62 9.9626 Tf -132.811 -17.131 Td [(\050c\051)]TJ +0 g 0 G + [-500(Assemble)-250(with)]TJ/F67 9.9626 Tf 84.223 0 Td [(psb_spasb)]TJ/F62 9.9626 Tf 49.564 0 Td [(and)]TJ/F67 9.9626 Tf 19.357 0 Td [(psb_geasb)]TJ +0 g 0 G +/F62 9.9626 Tf -154.808 -17.132 Td [(\050d\051)]TJ +0 g 0 G + [-500(Choose)-250(and)-250(build)-250(pr)18(econditioner)-250(with)]TJ/F67 9.9626 Tf 188.671 0 Td [(prec%init)]TJ/F62 9.9626 Tf 49.563 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(prec%build)]TJ +0 g 0 G +/F62 9.9626 Tf -256.277 -17.131 Td [(\050e\051)]TJ +0 g 0 G + [-500(Call)-250(the)-250(iterative)-250(method)-250(of)-250(choice,)-250(e.g.)]TJ/F67 9.9626 Tf 190.902 0 Td [(psb_bicgstab)]TJ/F62 9.9626 Tf -221.338 -22.307 Td [(The)-276(insertion)-275(r)18(outines)-276(will)-275(be)-276(called)-275(as)-276(many)-276(times)-275(as)-276(needed;)-288(they)-276(only)-275(need)]TJ 0 -11.955 Td [(to)-214(be)-213(called)-214(on)-213(the)-214(data)-214(t)1(hat)-214(is)-214(actually)-213(allocated)-214(to)-213(the)-214(curr)18(ent)-213(pr)18(ocess,)-221(i.e.)-298(each)]TJ 0 -11.955 Td [(pr)18(ocess)-250(generates)-250(its)-250(own)-250(data.)]TJ 14.944 -12.551 Td [(In)-219(principle)-218(ther)18(e)-219(is)-219(no)-218(speci\002c)-219(or)18(der)-219(in)-218(the)-219(calls)-219(to)]TJ/F67 9.9626 Tf 220.804 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 47.073 0 Td [(,)-225(nor)-219(is)-218(ther)18(e)-219(a)]TJ -282.821 -11.955 Td [(r)18(equir)18(ement)-243(to)-243(build)-243(a)-242(matrix)-243(r)18(ow)-243(in)-243(its)-243(entir)18(ety)-243(befor)18(e)-242(calling)-243(the)-243(r)18(outine;)-245(this)]TJ 0 -11.955 Td [(allows)-364(t)1(he)-364(application)-363(pr)18(ogrammer)-364(to)-363(walk)-364(thr)18(ough)-363(the)-364(discr)18(etization)-363(mesh)]TJ 0 -11.955 Td [(element)-316(by)-317(element,)-333(generating)-316(the)-316(main)-317(part)-316(of)-316(a)-317(given)-316(matrix)-316(r)18(ow)-317(but)-316(also)]TJ 0 -11.956 Td [(contributions)-250(to)-250(the)-250(r)18(ows)-250(corr)18(esponding)-250(to)-250(neighbouring)-250(elements.)]TJ 14.944 -12.55 Td [(Fr)18(om)-328(a)-329(func)1(tional)-329(point)-328(of)-328(view)-328(it)-329(is)-328(even)-328(possible)-328(to)-329(exe)1(cute)-329(one)-328(call)-328(for)]TJ -14.944 -11.955 Td [(each)-204(nonzer)18(o)-204(coef)18(\002cient;)-219(however)-203(this)-204(would)-204(have)-204(a)-204(subst)1(antial)-204(computational)]TJ 0 -11.955 Td [(over)18(head.)-457(It)-299(is)-299(ther)18(efor)18(e)-299(advisable)-299(to)-299(pack)-299(a)-299(certain)-299(amount)-299(of)-299(data)-299(into)-299(each)]TJ 0 -11.956 Td [(call)-303(to)-303(the)-302(insertion)-303(r)18(outine,)-316(say)-303(touching)-303(on)-302(a)-303(few)-303(tens)-303(of)-302(r)18(ows;)-330(the)-302(best)-303(per)18(-)]TJ 0 -11.955 Td [(formng)-342(value)-343(would)-342(depend)-342(on)-342(both)-343(the)-342(ar)18(chitectur)18(e)-342(of)-343(the)-342(computer)-342(being)]TJ 0 -11.955 Td [(used)-223(and)-223(on)-222(the)-223(pr)18(oblem)-223(str)8(uctur)18(e.)-301(At)-222(the)-223(opposite)-223(extr)18(eme,)-228(it)-223(would)-222(be)-223(possi-)]TJ 0 -11.955 Td [(ble)-267(to)-267(generate)-267(the)-267(entir)18(e)-267(part)-267(of)-267(a)-267(coef)18(\002cient)-267(matrix)-267(r)18(esiding)-267(on)-267(a)-267(pr)18(ocess)-267(and)]TJ 0 -11.955 Td [(pass)-275(it)-274(in)-275(a)-275(single)-274(call)-275(to)]TJ/F67 9.9626 Tf 108.421 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 47.073 0 Td [(;)-287(this,)-281(however)74(,)-281(would)-274(entail)-275(a)-275(doubling)-274(of)]TJ -155.494 -11.956 Td [(memory)-250(occupation,)-250(and)-250(thus)-250(would)-250(be)-250(almost)-250(always)-250(far)-250(fr)18(om)-250(optimal.)]TJ +0 g 0 G +ET +q +1 0 0 1 99.895 139.555 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +Q +BT +/F62 5.9776 Tf 110.755 132.683 Td [(3)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(The)-260(subr)18(outine)-260(style)]TJ/F97 7.9701 Tf 74.235 0 Td [(psb)]TJ +ET +q +1 0 0 1 201.687 129.989 cm +[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +Q +BT +/F97 7.9701 Tf 204.228 129.79 Td [(precinit)]TJ/F62 7.9701 Tf 35.946 0 Td [(and)]TJ/F97 7.9701 Tf 15.567 0 Td [(psb)]TJ +ET +q +1 0 0 1 268.951 129.989 cm +[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +Q +BT +/F97 7.9701 Tf 271.492 129.79 Td [(precbl)]TJ/F62 7.9701 Tf 27.478 0 Td [(ar)18(e)-260(still)-260(supported)-260(for)-260(backwar)18(d)-260(compat-)]TJ -199.075 -9.464 Td [(ibility)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 169.365 -29.888 Td [(7)]TJ +0 g 0 G +ET + +endstream +endobj +867 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 213.468 180.781 222.877] -/A << /S /GoTo /D (subsection.8.4) >> +/Type /ObjStm +/N 100 +/First 906 +/Length 12362 >> -% 734 0 obj +stream +838 0 839 147 840 300 841 447 842 600 843 753 844 906 845 1059 846 1206 847 1354 +848 1502 849 1650 850 1797 851 1946 852 2095 853 2244 854 2393 855 2542 856 2691 857 2840 +858 2989 859 3138 863 3286 860 3343 879 3423 868 3629 869 3779 870 3930 871 4082 872 4236 +873 4386 874 4536 875 4686 876 4834 877 4982 7 5130 878 5184 898 5277 901 5427 902 5668 +903 5710 904 6096 892 6396 893 6542 894 6689 11 6836 900 6892 897 6949 910 7070 896 7220 +907 7368 908 7517 912 7666 15 7722 916 7777 917 7834 909 7891 923 8023 927 8165 928 8279 +929 8321 919 8390 920 8537 925 8685 926 8742 19 8799 922 8855 935 9002 932 9144 933 9291 +937 9438 934 9494 940 9587 942 9701 23 9758 943 9814 944 9871 945 9928 946 9985 947 10042 +948 10099 949 10156 950 10213 939 10270 953 10389 938 10523 955 10672 956 10728 957 10784 958 10840 +959 10896 960 10952 961 11008 962 11064 963 11120 964 11176 965 11232 966 11288 967 11344 968 11400 +% 838 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 191.298 152.896 200.628] -/A << /S /GoTo /D (section.9) >> +/Rect [149.709 529.611 247.72 538.941] +/A << /S /GoTo /D (section.11) >> >> -% 735 0 obj +% 839 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 176.574 444.603 188.633] -/A << /S /GoTo /D (subsection.9.1) >> +/Rect [164.653 515.026 393.631 527.085] +/A << /S /GoTo /D (subsection.11.1) >> >> -% 745 0 obj +% 840 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 164.619 201.494 176.678] -/A << /S /GoTo /D (subsection.9.1) >> +/Rect [149.709 495.738 215.89 504.888] +/A << /S /GoTo /D (section.12) >> >> -% 736 0 obj +% 841 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 152.524 444.603 164.584] -/A << /S /GoTo /D (subsection.9.2) >> +/Rect [164.653 481.153 280.885 493.212] +/A << /S /GoTo /D (subsection.12.1) >> >> -% 746 0 obj +% 842 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 143.219 168.468 152.629] -/A << /S /GoTo /D (subsection.9.2) >> +/Rect [164.653 471.847 310.634 481.063] +/A << /S /GoTo /D (subsection.12.2) >> >> -% 737 0 obj +% 843 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [113.843 128.475 444.603 140.535] -/A << /S /GoTo /D (subsection.9.3) >> +/Rect [164.653 459.892 283.267 469.302] +/A << /S /GoTo /D (subsection.12.3) >> >> -% 747 0 obj +% 844 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [98.899 119.17 202.859 128.58] -/A << /S /GoTo /D (subsection.9.3) >> ->> -% 742 0 obj -<< -/D [740 0 R /XYZ 98.895 753.953 null] ->> -% 739 0 obj -<< -/Font << /F54 586 0 R /F51 584 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 761 0 obj -<< -/Type /Page -/Contents 762 0 R -/Resources 760 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 587 0 R -/Annots [ 738 0 R 764 0 R 749 0 R 765 0 R 750 0 R 766 0 R 751 0 R 752 0 R 753 0 R 754 0 R 755 0 R 756 0 R 757 0 R 758 0 R 759 0 R ] +/Rect [164.653 447.937 292.711 457.347] +/A << /S /GoTo /D (subsection.12.4) >> >> -% 738 0 obj +% 845 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 702.323 495.412 714.383] -/A << /S /GoTo /D (subsection.9.4) >> +/Rect [149.709 425.999 303.341 435.15] +/A << /S /GoTo /D (section.13) >> >> -% 764 0 obj +% 846 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 693.018 253.668 702.428] -/A << /S /GoTo /D (subsection.9.4) >> +/Rect [164.653 411.414 225.882 423.474] +/A << /S /GoTo /D (section*.6) >> >> -% 749 0 obj +% 847 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 678.413 495.412 690.472] -/A << /S /GoTo /D (subsection.9.5) >> +/Rect [164.653 399.459 227.098 411.519] +/A << /S /GoTo /D (section*.7) >> >> -% 765 0 obj +% 848 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 669.108 253.668 678.517] -/A << /S /GoTo /D (subsection.9.5) >> +/Rect [164.653 387.504 262.236 399.563] +/A << /S /GoTo /D (section*.8) >> >> -% 750 0 obj +% 849 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 654.503 495.412 666.562] -/A << /S /GoTo /D (subsection.9.6) >> +/Rect [164.653 375.549 282.36 387.608] +/A << /S /GoTo /D (section*.9) >> >> -% 766 0 obj +% 850 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 645.197 253.668 654.607] -/A << /S /GoTo /D (subsection.9.6) >> +/Rect [164.653 363.593 254.803 375.653] +/A << /S /GoTo /D (section*.10) >> >> -% 751 0 obj +% 851 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 623.26 274.28 632.59] -/A << /S /GoTo /D (section.10) >> +/Rect [164.653 351.638 253.488 363.698] +/A << /S /GoTo /D (section*.11) >> >> -% 752 0 obj +% 852 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 608.674 333.298 620.734] -/A << /S /GoTo /D (subsection.10.1) >> +/Rect [164.653 339.683 280.328 351.743] +/A << /S /GoTo /D (section*.12) >> >> -% 753 0 obj +% 853 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 596.719 331.326 608.779] -/A << /S /GoTo /D (subsection.10.2) >> +/Rect [164.653 327.728 252.871 339.788] +/A << /S /GoTo /D (section*.13) >> >> -% 754 0 obj +% 854 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 584.764 381.626 596.824] -/A << /S /GoTo /D (subsection.10.3) >> +/Rect [164.653 315.773 281.971 327.832] +/A << /S /GoTo /D (section*.14) >> >> -% 755 0 obj +% 855 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 572.809 427.165 584.869] -/A << /S /GoTo /D (subsection.10.4) >> +/Rect [164.653 303.818 296.477 315.877] +/A << /S /GoTo /D (section*.15) >> >> -% 756 0 obj +% 856 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 560.854 353.343 572.913] -/A << /S /GoTo /D (subsection.10.5) >> +/Rect [164.653 291.862 305.742 303.922] +/A << /S /GoTo /D (section*.16) >> >> -% 757 0 obj +% 857 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 548.899 315.177 560.958] -/A << /S /GoTo /D (subsection.10.6) >> +/Rect [164.653 279.907 293.966 291.967] +/A << /S /GoTo /D (section*.17) >> >> -% 758 0 obj +% 858 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [149.709 529.611 247.72 538.941] -/A << /S /GoTo /D (section.11) >> +/Rect [164.653 267.952 292.711 280.012] +/A << /S /GoTo /D (section*.18) >> >> -% 759 0 obj +% 859 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [164.653 515.026 393.631 527.085] -/A << /S /GoTo /D (subsection.11.1) >> +/Rect [164.653 255.997 319.55 268.057] +/A << /S /GoTo /D (section*.19) >> >> -% 763 0 obj +% 863 0 obj << -/D [761 0 R /XYZ 149.705 753.953 null] +/D [861 0 R /XYZ 149.705 753.953 null] >> -% 760 0 obj +% 860 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> +/Font << /F62 667 0 R /F59 665 0 R >> /ProcSet [ /PDF /Text ] >> -% 778 0 obj +% 879 0 obj << /Type /Page -/Contents 779 0 R -/Resources 777 0 R +/Contents 880 0 R +/Resources 878 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 767 0 R 768 0 R 769 0 R 770 0 R 771 0 R 772 0 R 773 0 R 774 0 R 775 0 R 776 0 R ] +/Parent 881 0 R +/Annots [ 868 0 R 869 0 R 870 0 R 871 0 R 872 0 R 873 0 R 874 0 R 875 0 R 876 0 R 877 0 R ] >> -% 767 0 obj +% 868 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [408.168 586.065 420.123 595.071] +/Rect [408.168 585.966 420.123 595.071] /A << /S /GoTo /D (cite.metcalf) >> >> -% 768 0 obj +% 869 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [300.381 514.225 312.336 523.231] +/Rect [300.381 514.126 312.336 523.231] /A << /S /GoTo /D (cite.Sparse03) >> >> -% 769 0 obj +% 870 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [302.511 502.27 309.484 511.276] +/Rect [311.368 502.171 323.323 511.127] /A << /S /GoTo /D (cite.DesPat:11) >> >> -% 770 0 obj +% 871 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [312.107 502.171 324.063 511.276] +/Rect [327.422 502.171 339.378 511.276] /A << /S /GoTo /D (cite.RouXiaXu:11) >> >> -% 771 0 obj +% 872 0 obj << /Type /Annot /Subtype /Link @@ -4498,15 +5321,15 @@ stream /Rect [234.17 442.286 246.125 451.392] /A << /S /GoTo /D (cite.machiels) >> >> -% 772 0 obj +% 873 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [241.917 370.446 248.891 379.402] +/Rect [241.917 370.546 248.891 379.402] /A << /S /GoTo /D (cite.sblas97) >> >> -% 773 0 obj +% 874 0 obj << /Type /Annot /Subtype /Link @@ -4514,23 +5337,23 @@ stream /Rect [251.448 370.446 258.422 379.402] /A << /S /GoTo /D (cite.sblas02) >> >> -% 774 0 obj +% 875 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [226.689 358.491 238.644 367.597] +/Rect [226.689 358.591 238.644 367.597] /A << /S /GoTo /D (cite.BLAS1) >> >> -% 775 0 obj +% 876 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [241.633 358.491 248.606 367.447] +/Rect [241.633 358.591 248.606 367.597] /A << /S /GoTo /D (cite.BLAS2) >> >> -% 776 0 obj +% 877 0 obj << /Type /Annot /Subtype /Link @@ -4540,23 +5363,23 @@ stream >> % 7 0 obj << -/D [778 0 R /XYZ 99.895 716.092 null] +/D [879 0 R /XYZ 99.895 716.092 null] >> -% 777 0 obj +% 878 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 797 0 obj +% 898 0 obj << /Type /Page -/Contents 798 0 R -/Resources 796 0 R +/Contents 899 0 R +/Resources 897 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 791 0 R 792 0 R 793 0 R ] +/Parent 881 0 R +/Annots [ 892 0 R 893 0 R 894 0 R ] >> -% 800 0 obj +% 901 0 obj << /Producer (GPL Ghostscript 9.04) /CreationDate (D:20111215145523+01'00') @@ -4565,15 +5388,15 @@ stream /Creator (fig2dev Version 3.2 Patchlevel 5d) /Author (sfilippo@donald \(Salvatore Filippone\)) >> -% 801 0 obj +% 902 0 obj << /Type /ExtGState /OPM 1 >> -% 802 0 obj +% 903 0 obj << /BaseFont /JEJNJE+Times-Roman -/FontDescriptor 803 0 R +/FontDescriptor 904 0 R /Type /Font /FirstChar 32 /LastChar 116 @@ -4581,7 +5404,7 @@ stream /Encoding /WinAnsiEncoding /Subtype /Type1 >> -% 803 0 obj +% 904 0 obj << /Type /FontDescriptor /FontName /JEJNJE+Times-Roman @@ -4595,9 +5418,9 @@ stream /MissingWidth 500 /XHeight 460 /CharSet (/A/B/F/I/L/M/P/S/a/c/e/f/g/i/l/n/o/p/r/s/space/t/three/two/zero) -/FontFile3 804 0 R +/FontFile3 905 0 R >> -% 791 0 obj +% 892 0 obj << /Type /Annot /Subtype /Link @@ -4605,15 +5428,15 @@ stream /Rect [268.275 538.551 275.249 550.611] /A << /S /GoTo /D (figure.1) >> >> -% 792 0 obj +% 893 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [443.339 469.57 450.312 478.427] +/Rect [443.339 469.47 450.312 478.427] /A << /S /GoTo /D (cite.BLACS) >> >> -% 793 0 obj +% 894 0 obj << /Type /Annot /Subtype /Link @@ -4623,36 +5446,36 @@ stream >> % 11 0 obj << -/D [797 0 R /XYZ 150.705 675.823 null] +/D [898 0 R /XYZ 150.705 675.823 null] >> -% 799 0 obj +% 900 0 obj << -/D [797 0 R /XYZ 150.705 272.018 null] +/D [898 0 R /XYZ 150.705 272.018 null] >> -% 796 0 obj +% 897 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F52 585 0 R >> -/XObject << /Im2 794 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F60 666 0 R >> +/XObject << /Im2 895 0 R >> /ProcSet [ /PDF /Text ] >> -% 809 0 obj +% 910 0 obj << /Type /Page -/Contents 810 0 R -/Resources 808 0 R +/Contents 911 0 R +/Resources 909 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 795 0 R 806 0 R 807 0 R ] +/Parent 881 0 R +/Annots [ 896 0 R 907 0 R 908 0 R ] >> -% 795 0 obj +% 896 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[0 1 0] -/Rect [275.119 609.432 287.074 618.438] +/Rect [275.119 609.332 287.074 618.438] /A << /S /GoTo /D (cite.METIS) >> >> -% 806 0 obj +% 907 0 obj << /Type /Annot /Subtype /Link @@ -4660,21616 +5483,25844 @@ stream /Rect [232.706 534.258 238.983 547.962] /A << /S /GoTo /D (Hfootnote.1) >> >> -% 807 0 obj +% 908 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [157.808 185.645 164.084 199.235] +/A << /S /GoTo /D (Hfootnote.2) >> +>> +% 912 0 obj +<< +/D [910 0 R /XYZ 98.895 753.953 null] +>> +% 15 0 obj +<< +/D [910 0 R /XYZ 99.895 504.866 null] +>> +% 916 0 obj +<< +/D [910 0 R /XYZ 114.242 167.999 null] +>> +% 917 0 obj +<< +/D [910 0 R /XYZ 114.242 158.184 null] +>> +% 909 0 obj +<< +/Font << /F62 667 0 R /F60 666 0 R /F67 913 0 R /F59 665 0 R /F91 914 0 R /F93 915 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 923 0 obj +<< +/Type /Page +/Contents 924 0 R +/Resources 922 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 881 0 R +/Annots [ 919 0 R 920 0 R ] +>> +% 927 0 obj +<< +/Producer (ESP Ghostscript 815.03) +/CreationDate (D:20070123225315) +/ModDate (D:20070123225315) +>> +% 928 0 obj +<< +/Type /ExtGState +/OPM 1 +>> +% 929 0 obj +<< +/BaseFont /Times-Roman +/Type /Font +/Subtype /Type1 +>> +% 919 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [327.281 638.21 334.255 647.166] +/A << /S /GoTo /D (cite.2007c) >> +>> +% 920 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [337.243 638.309 344.217 647.166] +/A << /S /GoTo /D (cite.2007d) >> +>> +% 925 0 obj +<< +/D [923 0 R /XYZ 149.705 753.953 null] +>> +% 926 0 obj +<< +/D [923 0 R /XYZ 150.705 353.614 null] +>> +% 19 0 obj +<< +/D [923 0 R /XYZ 150.705 270.035 null] +>> +% 922 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F91 914 0 R /F60 666 0 R /F93 915 0 R >> +/XObject << /Im3 921 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 935 0 obj +<< +/Type /Page +/Contents 936 0 R +/Resources 934 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 881 0 R +/Annots [ 932 0 R 933 0 R ] +>> +% 932 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [404.739 354.196 411.713 366.255] +/A << /S /GoTo /D (section.3) >> +>> +% 933 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [172.593 318.033 179.567 330.093] +/A << /S /GoTo /D (section.6) >> +>> +% 937 0 obj +<< +/D [935 0 R /XYZ 98.895 753.953 null] +>> +% 934 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 940 0 obj +<< +/Type /Page +/Contents 941 0 R +/Resources 939 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 881 0 R +>> +% 942 0 obj +<< +/D [940 0 R /XYZ 149.705 753.953 null] +>> +% 23 0 obj +<< +/D [940 0 R /XYZ 150.705 716.092 null] +>> +% 943 0 obj +<< +/D [940 0 R /XYZ 150.705 282.521 null] +>> +% 944 0 obj +<< +/D [940 0 R /XYZ 150.705 261.733 null] +>> +% 945 0 obj +<< +/D [940 0 R /XYZ 150.705 240.946 null] +>> +% 946 0 obj +<< +/D [940 0 R /XYZ 150.705 220.159 null] +>> +% 947 0 obj +<< +/D [940 0 R /XYZ 150.705 188.012 null] +>> +% 948 0 obj +<< +/D [940 0 R /XYZ 150.705 167.072 null] +>> +% 949 0 obj +<< +/D [940 0 R /XYZ 150.705 148.646 null] +>> +% 950 0 obj +<< +/D [940 0 R /XYZ 150.705 132.275 null] +>> +% 939 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R /F93 915 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 953 0 obj +<< +/Type /Page +/Contents 954 0 R +/Resources 952 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 971 0 R +/Annots [ 938 0 R ] +>> +% 938 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [176.109 690.964 182.386 703.958] +/A << /S /GoTo /D (Hfootnote.3) >> +>> +% 955 0 obj +<< +/D [953 0 R /XYZ 98.895 753.953 null] +>> +% 956 0 obj +<< +/D [953 0 R /XYZ 99.895 716.092 null] +>> +% 957 0 obj +<< +/D [953 0 R /XYZ 99.895 686.784 null] +>> +% 958 0 obj +<< +/D [953 0 R /XYZ 99.895 618.259 null] +>> +% 959 0 obj +<< +/D [953 0 R /XYZ 99.895 595.952 null] +>> +% 960 0 obj +<< +/D [953 0 R /XYZ 99.895 573.645 null] +>> +% 961 0 obj +<< +/D [953 0 R /XYZ 99.895 539.978 null] +>> +% 962 0 obj +<< +/D [953 0 R /XYZ 99.895 517.075 null] +>> +% 963 0 obj +<< +/D [953 0 R /XYZ 99.895 494.768 null] +>> +% 964 0 obj +<< +/D [953 0 R /XYZ 99.895 469.873 null] +>> +% 965 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [157.808 185.645 164.084 199.235] -/A << /S /GoTo /D (Hfootnote.2) >> +/D [953 0 R /XYZ 99.895 442.062 null] >> -% 811 0 obj +% 966 0 obj << -/D [809 0 R /XYZ 98.895 753.953 null] +/D [953 0 R /XYZ 99.895 412.296 null] >> -% 15 0 obj +% 967 0 obj << -/D [809 0 R /XYZ 99.895 504.866 null] +/D [953 0 R /XYZ 99.895 395.165 null] >> -% 815 0 obj +% 968 0 obj << -/D [809 0 R /XYZ 114.242 167.999 null] +/D [953 0 R /XYZ 99.895 377.438 null] >> -% 816 0 obj + +endstream +endobj +976 0 obj << -/D [809 0 R /XYZ 114.242 158.184 null] +/Length 7173 >> -% 808 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(2.3.1)-1000(User)18(-de\002ned)-250(index)-250(mappings)]TJ/F62 9.9626 Tf 0 -18.964 Td [(PSBLAS)-316(supports)-315(user)18(-de\002ned)-316(global)-316(to)-315(local)-316(index)-316(mappings,)-332(subject)-315(to)-316(the)]TJ 0 -11.955 Td [(constraints)-250(outlined)-250(in)-250(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-250(2.3)]TJ +0 g 0 G + [(:)]TJ +0 g 0 G + 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(The)-250(set)-250(of)-250(indices)-250(owned)-250(locally)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)-250(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 294.494 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -1.495 Td [(r)18(ow)]TJ/F60 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F62 9.9626 Tf 2.775 3.143 Td [(;)]TJ +0 g 0 G + -320.47 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(The)-250(set)-250(of)-250(halo)-250(points)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)]TJ/F60 9.9626 Tf 228.286 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F60 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F93 10.3811 Tf 4.836 3.142 Td [(+)]TJ/F62 9.9626 Tf 10.132 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.966 0 Td [(n)]TJ/F62 9.9626 Tf 5.664 -3.83 Td [(col)]TJ/F60 5.9776 Tf 12.795 -1.649 Td [(i)]TJ/F62 9.9626 Tf 2.774 5.479 Td [(;)]TJ -320.107 -21.759 Td [(but)-289(otherwise)-289(the)-288(mapping)-289(is)-289(arbitrary)111(.)-426(The)-289(user)-289(application)-289(is)-288(r)18(esponsible)-289(to)]TJ 0 -11.956 Td [(ensur)18(e)-262(consistency)-261(of)-262(this)-262(mapping;)-267(some)-262(err)18(ors)-262(may)-261(be)-262(caught)-262(by)-261(the)-262(library)111(,)]TJ 0 -11.955 Td [(but)-236(this)-236(is)-236(not)-236(guaranteed.)-305(The)-236(application)-236(str)8(uctur)18(e)-236(to)-236(support)-236(this)-236(usage)-236(is)-236(as)]TJ 0 -11.955 Td [(follows:)]TJ +0 g 0 G + 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(Initialize)-190(index)-190(space)-190(with)]TJ/F67 9.9626 Tf 128.098 0 Td [(psb_cdall\050ictx,desc,info,vl=vl,lidx=lidx\051)]TJ/F62 9.9626 Tf -115.645 -11.955 Td [(passing)-292(the)-293(vectors)]TJ/F67 9.9626 Tf 87.882 0 Td [(vl\050:\051)]TJ/F62 9.9626 Tf 29.064 0 Td [(containing)-292(the)-293(set)-292(of)-293(global)-292(indices)-292(owned)-293(by)]TJ -116.946 -11.956 Td [(the)-327(curr)18(ent)-328(pr)18(ocess)-327(and)]TJ/F67 9.9626 Tf 108.493 0 Td [(lidx\050:\051)]TJ/F62 9.9626 Tf 39.873 0 Td [(containing)-327(the)-327(corr)18(esponding)-328(local)-327(in-)]TJ -148.366 -11.955 Td [(dices;)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + [-500(Add)-412(the)-412(halo)-412(points)]TJ/F67 9.9626 Tf 109.326 0 Td [(ja\050:\051)]TJ/F62 9.9626 Tf 30.256 0 Td [(and)-412(their)-412(associated)-412(local)-412(indices)]TJ/F67 9.9626 Tf 155.064 0 Td [(lidx\050:\051)]TJ/F62 9.9626 Tf -282.193 -11.955 Td [(with)-250(a\050some\051)-250(call\050s\051)-250(to)]TJ/F67 9.9626 Tf 99.815 0 Td [(psb_cdins\050nz,ja,desc,info,lidx=lidx\051)]TJ/F62 9.9626 Tf 188.292 0 Td [(;)]TJ +0 g 0 G + -300.56 -19.926 Td [(3.)]TJ +0 g 0 G + [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F67 9.9626 Tf 144.387 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(;)]TJ +0 g 0 G + -191.46 -19.925 Td [(4.)]TJ +0 g 0 G + [-500(Build)-190(the)-190(sparse)-190(matrices)-190(and)-190(vectors,)-202(optionally)-190(making)-190(use)-190(in)]TJ/F67 9.9626 Tf 288.117 0 Td [(psb_spins)]TJ/F62 9.9626 Tf -275.664 -11.955 Td [(and)]TJ/F67 9.9626 Tf 19.958 0 Td [(psb_geins)]TJ/F62 9.9626 Tf 50.163 0 Td [(of)-310(the)]TJ/F67 9.9626 Tf 28.756 0 Td [(local)]TJ/F62 9.9626 Tf 29.243 0 Td [(ar)18(gument)-310(specifying)-310(that)-310(the)-311(indices)-310(in)]TJ/F67 9.9626 Tf 177.734 0 Td [(ia)]TJ/F62 9.9626 Tf 10.46 0 Td [(,)]TJ/F67 9.9626 Tf -316.314 -11.955 Td [(ja)]TJ/F62 9.9626 Tf 12.952 0 Td [(and)]TJ/F67 9.9626 Tf 19.357 0 Td [(irw)]TJ/F62 9.9626 Tf 15.691 0 Td [(,)-250(r)18(espectively)111(,)-250(ar)18(e)-250(alr)18(eady)-250(local)-250(indices.)]TJ/F59 11.9552 Tf -72.906 -29.133 Td [(2.4)-1000(Programming)-250(model)]TJ/F62 9.9626 Tf 0 -18.964 Td [(The)-316(PSBLAS)-315(librarary)-316(is)-315(based)-316(on)-315(the)-316(Single)-315(Pr)18(ogram)-316(Multiple)-316(Data)-315(\050SPMD\051)]TJ 0 -11.955 Td [(pr)18(ogramming)-277(model:)-364(each)-277(pr)18(ocess)-277(participatin)1(g)-277(in)-277(the)-277(computation)-277(performs)]TJ 0 -11.955 Td [(the)-250(same)-250(actions)-250(on)-250(a)-250(chunk)-250(of)-250(data.)-310(Parallelism)-250(is)-250(thus)-250(data-driven.)]TJ 14.944 -11.956 Td [(Because)-313(of)-313(this)-312(str)8(uctur)18(e,)-329(many)-313(subr)18(outines)-313(coor)18(dinate)-312(their)-313(action)-313(acr)18(oss)]TJ -14.944 -11.955 Td [(the)-336(various)-336(pr)18(ocesses,)-358(thus)-336(pr)18(oviding)-336(an)-336(implicit)-336(synchr)18(onization)-336(point,)-358(and)]TJ 0 -11.955 Td [(ther)18(efor)18(e)]TJ/F60 9.9626 Tf 43.283 0 Td [(must)]TJ/F62 9.9626 Tf 24.136 0 Td [(be)-367(called)-366(simultaneously)-367(by)-366(all)-367(pr)18(ocesses)-367(participating)-366(in)-367(the)]TJ -67.419 -11.955 Td [(computation.)-525(This)-321(is)-322(certainly)-322(tr)8(ue)-321(for)-322(the)-322(data)-321(allocation)-322(and)-322(assembl)1(y)-322(r)18(ou-)]TJ 0 -11.955 Td [(tines,)-250(for)-250(all)-250(the)-250(computational)-250(r)18(outines)-250(and)-250(for)-250(some)-250(of)-250(the)-250(tools)-250(r)18(outines.)]TJ 14.944 -11.955 Td [(However)-333(ther)18(e)-332(ar)18(e)-333(many)-333(cases)-332(wher)18(e)-333(no)-333(synchr)18(onizati)1(on,)-354(and)-332(indeed)-333(no)]TJ -14.944 -11.956 Td [(communication)-344(among)-343(pr)18(ocesses,)-367(is)-344(implied;)-390(for)-344(instance,)-367(all)-344(the)-343(r)18(outines)-344(in)]TJ 0 -11.955 Td [(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-246(3)]TJ +0 g 0 G + [-247(ar)18(e)-246(only)-246(acting)-246(on)-247(the)-246(local)-246(data)-247(str)8(uctur)18(es,)-247(and)-246(thus)-246(may)-247(be)-246(called)-246(inde-)]TJ 0 -11.955 Td [(pendently)111(.)-306(The)-238(most)-238(important)-237(case)-238(is)-238(that)-238(of)-238(the)-238(coef)18(\002cient)-237(insertion)-238(r)18(outines:)]TJ 0 -11.955 Td [(since)-231(the)-231(number)-231(of)-230(coef)18(\002cients)-231(in)-231(the)-231(sparse)-231(and)-231(dense)-230(matrices)-231(varies)-231(among)]TJ 0 -11.955 Td [(the)-248(pr)18(ocessors,)-249(and)-249(since)-248(the)-249(user)-248(is)-249(fr)18(ee)-248(to)-249(choose)-248(an)-249(arbitrary)-248(or)18(der)-249(in)-248(builid-)]TJ 0 -11.955 Td [(ing)-250(the)-250(matrix)-250(entries,)-250(these)-250(r)18(outines)-250(cannot)-250(imply)-250(a)-250(synchr)18(onization.)]TJ 14.944 -11.956 Td [(Thr)18(oughout)-250(this)-250(user)-74('s)-250(guide)-250(each)-250(subr)18(outine)-250(will)-250(be)-250(clearly)-250(indicated)-250(as:)]TJ +0 g 0 G +/F59 9.9626 Tf -14.944 -19.925 Td [(Synchronous:)]TJ +0 g 0 G +/F62 9.9626 Tf 67.247 0 Td [(must)-307(be)-307(called)-308(simultaneously)-307(by)-307(all)-307(the)-308(pr)18(ocesses)-307(in)-307(the)-307(r)18(ele-)]TJ -42.341 -11.955 Td [(vant)-250(communication)-250(context;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(Asynchronous:)]TJ +0 g 0 G +/F62 9.9626 Tf 73.334 0 Td [(may)-250(be)-250(called)-250(in)-250(a)-250(totally)-250(independent)-250(manner)74(.)]TJ +0 g 0 G + 96.031 -56.634 Td [(8)]TJ +0 g 0 G +ET + +endstream +endobj +987 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F59 812 0 R /F51 584 0 R /F83 813 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/Length 8187 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 14.3462 Tf 99.895 705.784 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ/F62 9.9626 Tf 0 -23.091 Td [(In)-289(this)-288(chapter)-289(we)-289(illustrate)-288(the)-289(data)-289(str)8(uctur)18(es)-288(used)-289(for)-289(de\002nition)-289(of)-288(r)18(outines)]TJ 0 -11.956 Td [(interfaces.)-622(They)-354(include)-354(data)-354(str)8(uctur)18(es)-354(for)-354(sparse)-354(matrices,)-380(communication)]TJ 0 -11.955 Td [(descriptors)-250(and)-250(pr)18(econditioners.)]TJ 14.944 -12.156 Td [(All)-248(the)-248(data)-249(types)-248(and)-248(the)-248(basic)-248(subr)18(outine)-249(interfaces)-248(r)18(elated)-248(to)-248(descriptors)]TJ -14.944 -11.955 Td [(and)-345(sparse)-345(matrices)-344(ar)18(e)-345(de\002ned)-345(in)-345(the)-345(module)]TJ/F67 9.9626 Tf 213.323 0 Td [(psb_base_mod)]TJ/F62 9.9626 Tf 62.764 0 Td [(;)-392(this)-345(will)-345(have)]TJ -276.087 -11.955 Td [(to)-381(be)-381(included)-381(by)-381(every)-381(user)-381(subr)18(outine)-381(that)-381(makes)-381(u)1(se)-381(of)-381(the)-381(library)111(.)-703(The)]TJ 0 -11.956 Td [(pr)18(econditioners)-250(ar)18(e)-250(de\002ned)-250(in)-250(the)-250(module)]TJ/F67 9.9626 Tf 187.993 0 Td [(psb_prec_mod)]TJ/F62 9.9626 Tf -173.049 -12.156 Td [(Integer)74(,)-433(r)18(eal)-396(and)-397(complex)-396(data)-396(types)-397(ar)18(e)-396(parametrized)-396(with)-397(a)-396(kind)-396(type)]TJ -14.944 -11.955 Td [(de\002ned)-250(in)-250(the)-250(library)-250(as)-250(follows:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.528 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 566.32 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 566.121 Td [(spk)]TJ +ET +q +1 0 0 1 137.275 566.32 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 145.245 566.121 Td [(Kind)-407(parameter)-406(for)-407(short)-406(pr)18(ecision)-407(r)18(eal)-406(and)-407(complex)-406(data;)-485(corr)18(e-)]TJ -20.443 -11.955 Td [(sponds)-250(to)-250(a)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F67 9.9626 Tf 52.901 0 Td [(REAL)]TJ +0 g 0 G +/F62 9.9626 Tf 23.412 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(4)-250(bytes;)]TJ +0 g 0 G +/F59 9.9626 Tf -101.22 -20.73 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 533.635 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 533.436 Td [(dpk)]TJ +ET +q +1 0 0 1 138.939 533.635 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 146.909 533.436 Td [(Kind)-420(parameter)-421(for)-420(long)-420(pr)18(ecision)-421(r)18(eal)-420(and)-420(complex)-421(data;)-505(corr)18(e-)]TJ -22.107 -11.955 Td [(sponds)-250(to)-250(a)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F67 9.9626 Tf 52.901 0 Td [(DOUBLE)-525(PRECISION)]TJ +0 g 0 G +/F62 9.9626 Tf 86.176 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(8)-250(bytes;)]TJ +0 g 0 G +/F59 9.9626 Tf -163.984 -20.73 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 500.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 500.751 Td [(mpk)]TJ +ET +q +1 0 0 1 141.708 500.951 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 149.678 500.751 Td [(Kind)-250(parameter)-250(for)-250(4-bytes)-250(integer)-250(data,)-250(as)-250(is)-250(always)-250(used)-250(by)-250(MPI;)]TJ +0 g 0 G +/F59 9.9626 Tf -49.783 -20.729 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 480.221 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 480.022 Td [(epk)]TJ +ET +q +1 0 0 1 137.833 480.221 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 145.803 480.022 Td [(Kind)-364(parameter)-363(for)-364(8-bytes)-363(integer)-364(data,)-391(a)-1(s)-363(is)-364(always)-363(used)-364(by)-363(the)]TJ/F67 9.9626 Tf -21.001 -11.955 Td [(sizeof)]TJ/F62 9.9626 Tf 33.873 0 Td [(methods;)]TJ +0 g 0 G +/F59 9.9626 Tf -58.78 -20.73 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 447.537 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 447.337 Td [(ipk)]TJ +ET +q +1 0 0 1 136.169 447.537 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 144.139 447.337 Td [(Kind)-398(parameter)-399(for)-398(\223local\224)-398(integer)-399(indices)-398(and)-398(data;)-473(with)-398(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(a)-250(4)-250(bytes)-250(integer;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.729 Td [(psb)]TJ +ET +q +1 0 0 1 117.091 414.852 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 120.08 414.653 Td [(lpk)]TJ +ET +q +1 0 0 1 136.169 414.852 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 144.139 414.653 Td [(Kind)-328(parameter)-329(for)-328(\223global\224)-328(integer)-329(indices)-328(and)-328(data;)-368(with)-328(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(an)-250(8)-250(bytes)-250(integer;)]TJ -24.907 -20.529 Td [(The)-205(integer)-205(kinds)-205(for)-206(local)-205(and)-205(global)-205(indices)-205(can)-205(be)-206(chosen)-205(at)-205(con\002gur)18(e)-205(time)-205(to)]TJ 0 -11.955 Td [(hold)-266(4)-267(or)-266(8)-267(bytes,)-270(with)-266(the)-267(global)-266(indices)-266(at)-267(least)-266(as)-267(lar)18(ge)-266(as)-266(the)-267(local)-266(ones.)-359(T)92(o-)]TJ 0 -11.955 Td [(gether)-219(with)-220(the)-219(classes)-219(attributes)-219(we)-219(also)-220(discuss)-219(their)-219(methods.)-300(Most)-219(methods)]TJ 0 -11.955 Td [(detailed)-272(her)18(e)-272(only)-273(act)-272(on)-272(the)-272(local)-272(variable,)-278(i.e.)-376(their)-273(action)-272(is)-272(pur)18(ely)-272(local)-272(and)]TJ 0 -11.956 Td [(asynchr)18(onous)-359(unless)-360(otherwise)-359(stated.)-638(The)-359(list)-360(of)-359(methods)-359(her)18(e)-360(is)-359(not)-359(com-)]TJ 0 -11.955 Td [(pletely)-336(exhaustive;)-380(many)-336(methods,)-358(especially)-336(those)-336(that)-336(alter)-337(the)-336(contents)-336(of)]TJ 0 -11.955 Td [(the)-299(various)-298(objects,)-311(ar)18(e)-299(usually)-299(not)-299(needed)-298(by)-299(the)-299(end-user)74(,)-311(and)-298(ther)18(efor)18(e)-299(ar)18(e)]TJ 0 -11.955 Td [(described)-250(in)-250(the)-250(developer)-74('s)-250(documentation.)]TJ/F59 11.9552 Tf 0 -30.277 Td [(3.1)-1000(Descriptor)-250(data)-250(structure)]TJ/F62 9.9626 Tf 0 -19.353 Td [(All)-241(the)-241(gener)1(a)-1(l)-240(matrix)-241(informations)-241(and)-240(elements)-241(to)-241(be)-241(exchanged)-240(among)-241(pr)18(o-)]TJ 0 -11.956 Td [(cesses)-402(ar)18(e)-401(stor)18(ed)-402(within)-401(a)-402(data)-401(str)8(uctur)18(e)-402(of)-401(the)-402(type)]TJ/F67 9.9626 Tf 242.575 0 Td [(psb)]TJ +ET +q +1 0 0 1 358.788 237.097 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 361.927 236.897 Td [(desc)]TJ +ET +q +1 0 0 1 383.476 237.097 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 386.614 236.897 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)-765(Every)]TJ -307.64 -11.955 Td [(str)8(uctur)18(e)-371(of)-370(this)-371(type)-370(is)-371(associated)-370(with)-371(a)-371(discr)18(etization)-370(pattern)-371(and)-370(enables)]TJ 0 -11.955 Td [(data)-301(communications)-302(and)-301(other)-301(operations)-302(that)-301(ar)18(e)-302(nece)1(ssa)-1(r)1(y)-302(for)-301(implement-)]TJ 0 -11.955 Td [(ing)-250(the)-250(various)-250(algorithms)-250(of)-250(inter)18(est)-250(to)-250(us.)]TJ 14.944 -12.156 Td [(The)-265(data)-266(str)8(uctur)18(e)-265(itself)]TJ/F67 9.9626 Tf 107.448 0 Td [(psb_desc_type)]TJ/F62 9.9626 Tf 70.638 0 Td [(can)-265(be)-266(tr)18(eated)-265(as)-265(an)-265(opaque)-266(object)]TJ -193.03 -11.955 Td [(handled)-321(via)-321(the)-321(tools)-321(r)18(outines)-321(of)-321(Sec.)]TJ +0 0 1 rg 0 0 1 RG + [-321(6)]TJ +0 g 0 G + [-321(or)-321(the)-321(query)-321(r)18(outines)-321(detailed)-321(below;)]TJ 0 -11.956 Td [(nevertheless)-250(we)-250(include)-250(her)18(e)-250(a)-250(description)-250(for)-250(the)-250(curious)-250(r)18(eader)74(.)]TJ 14.944 -12.156 Td [(First)-229(we)-228(describe)-228(the)]TJ/F67 9.9626 Tf 92.473 0 Td [(psb_indx_map)]TJ/F62 9.9626 Tf 65.04 0 Td [(type.)-303(This)-228(is)-229(a)-228(data)-229(str)8(uctur)18(e)-228(that)-229(keeps)]TJ -172.457 -11.955 Td [(track)-250(of)-250(a)-250(certain)-250(number)-250(of)-250(basic)-250(issues)-250(such)-250(as:)]TJ +0 g 0 G + 13.888 -20.528 Td [(\225)]TJ +0 g 0 G + [-500(The)-250(value)-250(of)-250(the)-250(communication)-250(context;)]TJ +0 g 0 G + 155.477 -29.888 Td [(9)]TJ +0 g 0 G +ET + +endstream +endobj +995 0 obj +<< +/Length 6070 >> -% 822 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 164.593 706.129 Td [(\225)]TJ +0 g 0 G + [-500(The)-236(number)-236(of)-235(indices)-236(in)-236(the)-236(index)-236(space,)-238(i.e.)-306(global)-236(number)-235(of)-236(r)18(ows)-236(and)]TJ 11.018 -11.955 Td [(columns)-250(of)-250(a)-250(sparse)-250(matrix;)]TJ +0 g 0 G + -11.018 -20.409 Td [(\225)]TJ +0 g 0 G + [-500(The)-250(local)-250(set)-250(of)-250(indices,)-250(including:)]TJ +0 g 0 G +/F59 9.9626 Tf 22.974 -20.408 Td [(\226)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(local)-250(indices)-250(\050and)-250(local)-250(r)18(ows\051;)]TJ +0 g 0 G +/F59 9.9626 Tf -9.962 -16.182 Td [(\226)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(halo)-250(indices)-250(\050and)-250(ther)18(efor)18(e)-250(local)-250(columns\051;)]TJ +0 g 0 G +/F59 9.9626 Tf -9.962 -16.181 Td [(\226)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(global)-250(indices)-250(corr)18(esponding)-250(to)-250(the)-250(local)-250(ones.)]TJ -46.824 -20.409 Td [(Ther)18(e)-301(ar)18(e)-301(many)-301(dif)18(fer)18(ent)-301(schemes)-301(for)-301(storing)-301(these)-301(data;)-326(ther)18(efor)18(e)-301(ther)18(e)-301(ar)18(e)-301(a)]TJ 0 -11.955 Td [(number)-299(of)-299(types)-300(extending)-299(the)-299(base)-299(one,)-312(and)-299(the)-299(descriptor)-300(str)8(uctur)18(e)-299(holds)-299(a)]TJ 0 -11.955 Td [(polymorphic)-212(object)-213(whose)-212(dynamic)-212(type)-213(can)-212(be)-212(any)-213(of)-212(the)-212(extended)-213(types.)-297(The)]TJ 0 -11.955 Td [(methods)-250(associated)-250(with)-250(this)-250(data)-250(type)-250(answer)-250(the)-250(following)-250(queries:)]TJ +0 g 0 G + 13.888 -20.288 Td [(\225)]TJ +0 g 0 G + [-500(For)-411(a)-412(given)-411(set)-412(of)-411(local)-411(indices,)-452(\002nd)-412(the)-411(corr)18(esponding)-411(indices)-412(in)-411(the)]TJ 11.018 -11.955 Td [(global)-250(numbering;)]TJ +0 g 0 G + -11.018 -20.408 Td [(\225)]TJ +0 g 0 G + [-500(For)-357(a)-357(given)-357(set)-357(of)-358(global)-357(indices,)-384(\002nd)-357(the)-357(corr)18(esponding)-357(indices)-357(in)-357(the)]TJ 11.018 -11.955 Td [(local)-250(numbering,)-250(if)-250(any)111(,)-250(or)-250(r)18(eturn)-250(an)-250(invalid)]TJ +0 g 0 G + -11.018 -20.409 Td [(\225)]TJ +0 g 0 G + [-500(Add)-250(a)-250(global)-250(index)-250(to)-250(the)-250(set)-250(of)-250(halo)-250(indices;)]TJ +0 g 0 G + 0 -20.408 Td [(\225)]TJ +0 g 0 G + [-500(Find)-250(the)-250(pr)18(ocess)-250(owner)-250(of)-250(each)-250(member)-250(of)-250(a)-250(set)-250(of)-250(global)-250(indices.)]TJ -13.888 -20.288 Td [(All)-295(methods)-295(but)-294(the)-295(last)-295(ar)18(e)-295(pur)18(ely)-295(local;)-317(the)-295(last)-295(method)-294(potentially)-295(r)18(equir)18(es)]TJ 0 -11.955 Td [(communication)-418(among)-419(pr)18(ocesses,)-460(and)-419(thus)-418(is)-418(a)-419(synchr)18(onous)-418(method.)-815(The)]TJ 0 -11.955 Td [(choice)-244(of)-244(a)-244(speci\002c)-244(dynamic)-244(type)-244(for)-244(the)-244(index)-244(map)-244(is)-244(made)-244(at)-244(the)-244(time)-244(the)-244(de-)]TJ 0 -11.955 Td [(scriptor)-210(is)-211(init)1(ially)-211(allocated,)-218(accor)18(ding)-210(to)-211(t)1(he)-211(mode)-210(of)-210(initialization)-211(\050see)-210(also)]TJ +0 0 1 rg 0 0 1 RG + [-210(6)]TJ +0 g 0 G + [(\051.)]TJ 14.944 -12.076 Td [(The)-250(descriptor)-250(contents)-250(ar)18(e)-250(as)-250(follows:)]TJ +0 g 0 G +/F59 9.9626 Tf -14.944 -20.288 Td [(indxmap)]TJ +0 g 0 G +/F62 9.9626 Tf 45.38 0 Td [(A)-190(polymorphic)-190(variable)-190(of)-190(a)-190(type)-190(that)-190(is)-190(any)-190(extension)-190(of)-190(the)-190(indx)]TJ +ET +q +1 0 0 1 478.491 370.98 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 481.48 370.78 Td [(map)]TJ -305.869 -11.955 Td [(type)-250(described)-250(above.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -32.363 Td [(halo)]TJ +ET +q +1 0 0 1 171.228 326.661 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.217 326.462 Td [(index)]TJ +0 g 0 G +/F62 9.9626 Tf 30.435 0 Td [(A)-331(list)-332(of)-331(the)-332(halo)-331(and)-332(boundary)-331(elements)-332(for)-331(the)-332(curr)18(ent)-331(pr)18(ocess)]TJ -29.041 -11.955 Td [(to)-247(be)-247(exchanged)-247(with)-246(other)-247(pr)18(ocesses;)-248(for)-247(each)-247(pr)18(ocesses)-247(with)-247(whic)1(h)-247(it)-247(is)]TJ 0 -11.956 Td [(necessary)-250(to)-250(communicate:)]TJ +0 g 0 G + 9.465 -20.408 Td [(1.)]TJ +0 g 0 G + [-500(Pr)18(ocess)-250(identi\002er;)]TJ +0 g 0 G + 0 -16.182 Td [(2.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.181 Td [(3.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.182 Td [(4.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ +0 g 0 G + 0 -16.182 Td [(5.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -20.408 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -20.409 Td [(ext)]TJ +ET +q +1 0 0 1 164.583 176.799 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 167.572 176.599 Td [(index)]TJ +0 g 0 G +/F62 9.9626 Tf 30.435 0 Td [(A)-216(list)-217(of)-216(element)-217(indices)-216(to)-217(be)-216(exchanged)-217(to)-216(implement)-217(the)-216(mapping)]TJ -22.396 -11.955 Td [(between)-250(a)-250(base)-250(descriptor)-250(and)-250(a)-250(descriptor)-250(with)-250(overlap.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -20.408 Td [(ovrlap)]TJ +ET +q +1 0 0 1 180.642 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 183.631 132.281 Td [(index)]TJ +0 g 0 G +/F62 9.9626 Tf 30.436 0 Td [(A)-259(list)-259(of)-258(the)-259(overlap)-259(elements)-259(for)-259(the)-258(curr)18(ent)-259(pr)18(ocess,)-261(or)18(ganized)]TJ -38.456 -11.955 Td [(in)-250(gr)18(oups)-250(like)-250(the)-250(pr)18(evious)-250(vector:)]TJ +0 g 0 G + 141.968 -29.888 Td [(10)]TJ +0 g 0 G +ET + +endstream +endobj +1006 0 obj << -/Type /Page -/Contents 823 0 R -/Resources 821 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 818 0 R 819 0 R ] +/Length 7045 >> -% 826 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 134.267 706.129 Td [(1.)]TJ +0 g 0 G + [-500(Pr)18(ocess)-250(identi\002er;)]TJ +0 g 0 G + 0 -16.693 Td [(2.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.694 Td [(3.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ +0 g 0 G + 0 -16.693 Td [(4.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ +0 g 0 G + 0 -16.693 Td [(5.)]TJ +0 g 0 G + [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.431 Td [(ovr)]TJ +ET +q +1 0 0 1 115.447 596.693 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 118.436 596.494 Td [(mst)]TJ +ET +q +1 0 0 1 135.631 596.693 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 138.62 596.494 Td [(idx)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(A)-331(list)-332(to)-331(r)18(etrieve)-331(the)-332(value)-331(of)-331(each)-332(overlap)-331(element)-331(fr)18(om)-332(the)-331(r)18(e-)]TJ -33.185 -11.956 Td [(spective)-250(master)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(3.3)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.431 Td [(ovrlap)]TJ +ET +q +1 0 0 1 129.833 551.351 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 132.822 551.152 Td [(elem)]TJ +0 g 0 G +/F62 9.9626 Tf 27.118 0 Td [(For)-250(all)-250(overlap)-250(points)-250(belonging)-250(to)-250(th)-250(ecurr)18(ent)-250(pr)18(ocess:)]TJ +0 g 0 G + -25.673 -21.431 Td [(1.)]TJ +0 g 0 G + [-500(Overlap)-250(point)-250(index;)]TJ +0 g 0 G + 0 -16.693 Td [(2.)]TJ +0 g 0 G + [-500(Number)-250(of)-250(pr)18(ocesses)-250(sharing)-250(that)-250(overlap)-250(points;)]TJ +0 g 0 G + 0 -16.694 Td [(3.)]TJ +0 g 0 G + [-500(Index)-250(of)-250(a)-250(\223master)-74(\224)-250(pr)18(ocess:)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(an)-250(allocatable)-250(integer)-250(array)-250(of)-250(rank)-250(two.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.431 Td [(bnd)]TJ +ET +q +1 0 0 1 118.755 453.671 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 121.743 453.472 Td [(elem)]TJ +0 g 0 G +/F62 9.9626 Tf 27.119 0 Td [(A)-235(list)-235(of)-235(all)-235(boundary)-235(points,)-238(i.e.)-305(points)-235(that)-235(have)-235(a)-235(connection)-235(with)]TJ -24.06 -11.955 Td [(other)-250(pr)18(ocesses.)]TJ -24.907 -21.055 Td [(The)-393(Fortran)-394(2003)-393(declaration)-394(for)]TJ/F67 9.9626 Tf 151.232 0 Td [(psb_desc_type)]TJ/F62 9.9626 Tf 71.913 0 Td [(str)8(uctur)18(es)-393(is)-394(as)-393(follows:)-597(A)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 99.895 294.955 cm +0 0 343.711 104.608 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 102.884 388.902 Td [(type)]TJ +0 g 0 G + [-525(psb_desc_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 18.829 -10.959 Td [(class)]TJ +0 g 0 G + [(\050psb_indx_map\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(indxmap)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_halo_index)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.958 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_ext_index)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_ovrlap_index)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_i_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v_ovr_mst_idx)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(ovrlap_elem\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(bnd_elem\050:\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -18.829 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_desc_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf -2.989 -41.43 Td [(Listing)-259(1:)-327(The)-259(PSBLAS)-259(de\002ned)-259(data)-258(type)-259(that)-259(contains)-259(the)-258(communication)-259(de-)]TJ 0 -11.955 Td [(scriptor)74(.)]TJ 0 -25.259 Td [(communication)-319(descriptor)-320(associated)-319(with)-319(a)-320(sparse)-319(matrix)-320(has)-319(a)-319(state,)-337(which)]TJ 0 -11.955 Td [(can)-250(take)-250(the)-250(following)-250(values:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.054 Td [(Build:)]TJ +0 g 0 G +/F62 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.956 Td [(this)-220(state)-220(it)-220(is)-220(possible)-220(to)-220(add)-220(communication)-220(r)18(equir)18(ements)-220(among)-220(dif)18(fer)18(-)]TJ 0 -11.955 Td [(ent)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.431 Td [(Assembled:)]TJ +0 g 0 G +/F62 9.9626 Tf 58.381 0 Td [(State)-308(enter)18(ed)-308(after)-308(the)-309(assembly;)-337(computations)-308(using)-308(the)-308(associ-)]TJ -33.474 -11.955 Td [(ated)-310(sparse)-310(matrix,)-325(such)-310(as)-310(matrix-vector)-309(pr)18(oducts,)-325(ar)18(e)-310(only)-310(possible)-310(in)]TJ 0 -11.955 Td [(this)-250(state.)]TJ +0 g 0 G + 141.968 -29.888 Td [(11)]TJ +0 g 0 G +ET + +endstream +endobj +1022 0 obj << -/Producer (ESP Ghostscript 815.03) -/CreationDate (D:20070123225315) -/ModDate (D:20070123225315) +/Length 4957 >> -% 827 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(3.1.1)-1000(Descriptor)-250(Methods)]TJ 0 -19 Td [(3.1.2)-1000(get)]TJ +ET +q +1 0 0 1 195.029 687.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 198.017 687.129 Td [(local)]TJ +ET +q +1 0 0 1 220.194 687.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 223.183 687.129 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(rows)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -72.478 -19 Td [(nr)-525(=)-525(desc%get_local_rows\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.001 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-399(number)-398(of)-399(local)-398(r)18(ows,)-436(i.e.)-756(the)-398(number)-399(of)-399(r)18(ows)-398(owned)]TJ -47.87 -11.956 Td [(by)-350(the)-349(curr)18(ent)-350(pr)18(ocess;)-399(as)-350(explained)-350(in)]TJ +0 0 1 rg 0 0 1 RG + [-349(1)]TJ +0 g 0 G + [(,)-375(it)-350(is)-349(equal)-350(to)]TJ/F91 10.3811 Tf 249.705 0 Td [(j)-24(I)]TJ/F60 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.433 0 Td [(+)]TJ/F91 10.3811 Tf 10.624 0 Td [(j)-23(B)]TJ/F60 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F62 9.9626 Tf 3.003 0 Td [(.)-609(The)]TJ -293.569 -11.955 Td [(r)18(eturned)-250(value)-250(is)-250(speci\002c)-250(to)-250(the)-250(calling)-250(pr)18(ocess.)]TJ/F59 9.9626 Tf -24.906 -27.247 Td [(3.1.3)-1000(get)]TJ +ET +q +1 0 0 1 195.029 489.311 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 198.017 489.112 Td [(local)]TJ +ET +q +1 0 0 1 220.194 489.311 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 223.183 489.112 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -72.478 -19 Td [(nc)-525(=)-525(desc%get_local_cols\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.001 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-320(number)-320(of)-321(local)-320(cols,)-338(i.e.)-521(the)-320(number)-320(of)-321(indices)-320(used)-320(by)]TJ -47.87 -11.955 Td [(the)-322(curr)18(ent)-322(pr)18(ocess,)-340(including)-322(both)-322(local)-322(and)-322(halo)-322(indices;)-358(as)-322(explained)]TJ 0 -11.956 Td [(in)]TJ +0 0 1 rg 0 0 1 RG + [-284(1)]TJ +0 g 0 G + [(,)-294(i)1(t)-285(is)-284(equal)-285(to)]TJ/F91 10.3811 Tf 79.58 0 Td [(j)-24(I)]TJ/F60 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.193 0 Td [(+)]TJ/F91 10.3811 Tf 10.383 0 Td [(j)-24(B)]TJ/F60 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F93 10.3811 Tf 5.192 0 Td [(+)]TJ/F91 10.3811 Tf 10.383 0 Td [(j)-24(H)]TJ/F60 7.5716 Tf 12.052 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F62 9.9626 Tf 3.004 0 Td [(.)-413(The)-285(r)18(eturned)-284(value)-285(is)-284(speci\002c)-285(to)-284(the)]TJ -153.464 -11.955 Td [(calling)-250(pr)18(ocess.)]TJ/F59 9.9626 Tf -24.907 -27.247 Td [(3.1.4)-1000(get)]TJ +ET +q +1 0 0 1 195.029 279.339 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 198.017 279.14 Td [(global)]TJ +ET +q +1 0 0 1 227.397 279.339 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 230.386 279.14 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(rows)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -79.681 -19 Td [(nr)-525(=)-525(desc%get_global_rows\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.001 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-351(number)-351(of)-350(global)-351(r)18(ows,)-376(i.e.)-613(the)-351(size)-351(of)-350(the)-351(global)-351(index)]TJ -47.87 -11.955 Td [(space.)]TJ +0 g 0 G + 141.967 -29.888 Td [(12)]TJ +0 g 0 G +ET + +endstream +endobj +1026 0 obj << -/Type /ExtGState -/OPM 1 +/Length 4367 >> +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(3.1.5)-1000(get)]TJ +ET +q +1 0 0 1 144.219 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 706.129 Td [(global)]TJ +ET +q +1 0 0 1 176.587 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 179.576 706.129 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -79.681 -18.974 Td [(nr)-525(=)-525(desc%get_global_cols\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.935 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-242(number)-242(of)-241(global)-242(cols;)-245(usually)-241(this)-242(is)-242(equal)-242(to)-241(the)-242(number)]TJ -47.87 -11.955 Td [(of)-250(global)-250(r)18(ows.)]TJ/F59 9.9626 Tf -24.907 -27.172 Td [(3.1.6)-1000(get)]TJ +ET +q +1 0 0 1 144.219 520.607 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 520.408 Td [(global)]TJ +ET +q +1 0 0 1 176.587 520.607 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 179.576 520.408 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -79.681 -18.974 Td [(myidx)-525(=)-525(desc%get_global_indices\050[owned]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -91.287 -31.902 Td [(owned)]TJ +0 g 0 G +/F62 9.9626 Tf 35.975 0 Td [(Choose)-330(if)-329(you)-330(only)-329(want)-330(owned)-330(indices)-329(\050)]TJ/F67 9.9626 Tf 185.766 0 Td [(owned)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.true.)]TJ/F62 9.9626 Tf 62.764 0 Td [(\051)-330(or)-329(also)-330(halo)]TJ -259.598 -11.955 Td [(indices)-250(\050)]TJ/F67 9.9626 Tf 36.911 0 Td [(owned)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.false.)]TJ/F62 9.9626 Tf 67.995 0 Td [(\051.)-310(Scope:)]TJ/F59 9.9626 Tf 40.328 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -166.813 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(;)-250(default:)]TJ/F67 9.9626 Tf 41.872 0 Td [(.true.)]TJ/F62 9.9626 Tf 31.382 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -163.436 -33.89 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-277(global)-277(indices,)-284(r)18(eturned)-277(as)-277(an)-277(allocatable)-277(integer)-277(array)-277(of)]TJ -47.87 -11.955 Td [(kind)]TJ/F67 9.9626 Tf 22.814 0 Td [(psb_lpk_)]TJ/F62 9.9626 Tf 44.334 0 Td [(and)-250(rank)-250(1.)]TJ/F59 9.9626 Tf -92.055 -27.171 Td [(3.1.7)-1000(get)]TJ +ET +q +1 0 0 1 144.219 267.119 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 266.92 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -47.313 -18.975 Td [(ctxt)-525(=)-525(desc%get_context\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(desc)]TJ +0 g 0 G +/F62 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.947 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-250(communication)-250(context.)]TJ +0 g 0 G + 94.098 -29.888 Td [(13)]TJ +0 g 0 G +ET endstream endobj -836 0 obj +1031 0 obj << -/Length 4927 +/Length 4754 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(Data)-250(management)-250(and)-250(auxiliary)-250(routines)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(3.1.8)-1000(Clone)-250(\227)-250(clone)-250(current)-250(object)]TJ 0 g 0 G -/F54 9.9626 Tf 190.375 0 Td [(including:)]TJ 0 g 0 G - -154.569 -24.208 Td [(\225)]TJ +/F67 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%clone\050descout,info\051)]TJ 0 g 0 G - [-500(Parallel)-250(envir)18(onment)-250(management)]TJ +/F59 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -18.081 Td [(\225)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(Communication)-250(descriptors)-250(allocation;)]TJ +/F59 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -18.082 Td [(\225)]TJ 0 g 0 G - [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(allocation;)]TJ + 0 -20.598 Td [(desc)]TJ 0 g 0 G - 0 -18.081 Td [(\225)]TJ +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G - [-500(Dense)-250(and)-250(sparse)-250(matrix)-250(build)-250(and)-250(update;)]TJ +/F59 9.9626 Tf -77.917 -34.377 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -18.082 Td [(\225)]TJ 0 g 0 G - [-500(Sparse)-250(matrix)-250(and)-250(data)-250(distribution)-250(pr)18(epr)18(ocessing.)]TJ + 0 -20.597 Td [(descout)]TJ 0 g 0 G -/F51 9.9626 Tf -35.806 -24.207 Td [(Preconditioner)-250(routines)]TJ +/F62 9.9626 Tf 39.84 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ 0 g 0 G +/F59 9.9626 Tf -39.84 -20.597 Td [(info)]TJ 0 g 0 G - 0 -24.208 Td [(Iterative)-250(methods)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.8 -28.097 Td [(3.1.9)-1000(CNV)-250(\227)-250(convert)-250(internal)-250(storage)-250(format)]TJ 0 g 0 G -/F54 9.9626 Tf 84.951 0 Td [(a)-250(subset)-250(of)-250(Krylov)-250(subspace)-250(iterative)-250(methods)]TJ -84.951 -23.137 Td [(The)-262(following)-263(naming)-262(scheme)-262(has)-262(been)-263(adopted)-262(for)-262(all)-263(the)-262(symbols)-262(internally)]TJ 0 -11.955 Td [(de\002ned)-250(in)-250(the)-250(PSBLAS)-250(softwar)18(e)-250(package:)]TJ 0 g 0 G - 13.888 -23.137 Td [(\225)]TJ +/F67 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%cnv\050mold\051)]TJ 0 g 0 G - [-500(all)-250(symbols)-250(\050i.e.)-310(subr)18(outine)-250(names,)-250(data)-250(types...\051)-310(ar)18(e)-250(pr)18(e\002xed)-250(by)]TJ/F59 9.9626 Tf 294.184 0 Td [(psb_)]TJ +/F59 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf -294.184 -24.208 Td [(\225)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.597 Td [(desc)]TJ 0 g 0 G - [-500(all)-250(data)-250(type)-250(names)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F59 9.9626 Tf 166.604 0 Td [(_type)]TJ +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf -166.604 -24.208 Td [(\225)]TJ +/F59 9.9626 Tf -77.917 -32.553 Td [(mold)]TJ 0 g 0 G - [-500(all)-250(constants)-250(ar)18(e)-250(suf)18(\002xed)-250(by)]TJ/F59 9.9626 Tf 135.59 0 Td [(_)]TJ +/F62 9.9626 Tf 28.782 0 Td [(the)-250(desir)18(ed)-250(integer)-250(storage)-250(format.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F67 9.9626 Tf 221.926 0 Td [(psb)]TJ +ET +q +1 0 0 1 413.855 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 416.994 356.277 Td [(T)]TJ +ET +q +1 0 0 1 422.851 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 425.99 356.277 Td [(base)]TJ +ET +q +1 0 0 1 447.539 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 450.677 356.277 Td [(vect)]TJ +ET +q +1 0 0 1 472.226 356.476 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 475.364 356.277 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ -345.58 -22.59 Td [(The)]TJ/F67 9.9626 Tf 19.583 0 Td [(mold)]TJ/F62 9.9626 Tf 23.828 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ/F59 9.9626 Tf 0 -28.096 Td [(3.1.10)-1000(psb)]TJ +ET +q +1 0 0 1 202.769 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 205.758 293.636 Td [(cd)]TJ +ET +q +1 0 0 1 216.867 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 219.855 293.636 Td [(get)]TJ +ET +q +1 0 0 1 234.291 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 237.28 293.636 Td [(large)]TJ +ET +q +1 0 0 1 260.572 293.835 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 263.561 293.636 Td [(threshold)-190(\227)-190(Get)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ 0 g 0 G -/F54 9.9626 Tf -135.59 -24.208 Td [(\225)]TJ 0 g 0 G - [-500(all)-279(top-level)-279(subr)18(outine)-279(names)-279(follow)-279(the)-279(r)8(ule)]TJ/F59 9.9626 Tf 216.11 0 Td [(psb_xxname)]TJ/F54 9.9626 Tf 55.083 0 Td [(wher)18(e)]TJ/F59 9.9626 Tf 30.187 0 Td [(xx)]TJ/F54 9.9626 Tf 13.241 0 Td [(can)]TJ -303.602 -11.955 Td [(be)-250(either:)]TJ +/F67 9.9626 Tf -112.856 -19.29 Td [(ith)-525(=)-525(psb_cd_get_large_threshold\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 11.955 -24.208 Td [(\226)]TJ +/F59 9.9626 Tf 0 -22.421 Td [(T)90(ype:)]TJ 0 g 0 G -/F59 9.9626 Tf 9.963 0 Td [(ge)]TJ/F54 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(dense)-250(data,)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ +/F59 9.9626 Tf -29.828 -20.597 Td [(On)-250(Return)]TJ 0 g 0 G -/F59 9.9626 Tf 9.963 0 Td [(sp)]TJ/F54 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(sparse)-250(data,)]TJ 0 g 0 G -/F51 9.9626 Tf -20.423 -18.081 Td [(\226)]TJ + 0 -20.598 Td [(Function)-250(value)]TJ 0 g 0 G -/F59 9.9626 Tf 9.963 0 Td [(cd)]TJ/F54 9.9626 Tf 10.46 0 Td [(:)-310(the)-250(r)18(outine)-250(is)-250(r)18(elated)-250(to)-250(communication)-250(descriptor)-250(\050see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(curr)18(ent)-250(value)-250(for)-250(the)-250(size)-250(thr)18(eshold.)]TJ/F59 9.9626 Tf -72.777 -28.096 Td [(3.1.11)-1000(psb)]TJ +ET +q +1 0 0 1 202.769 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 205.758 182.634 Td [(cd)]TJ +ET +q +1 0 0 1 216.867 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 219.855 182.634 Td [(set)]TJ +ET +q +1 0 0 1 233.175 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 236.164 182.634 Td [(large)]TJ +ET +q +1 0 0 1 259.457 182.833 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 262.445 182.634 Td [(threshold)-190(\227)-190(Set)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ 0 g 0 G - [(\051.)]TJ -32.378 -24.208 Td [(For)-215(example)-215(the)]TJ/F59 9.9626 Tf 72.515 0 Td [(psb_geins)]TJ/F54 9.9626 Tf 47.073 0 Td [(,)]TJ/F59 9.9626 Tf 4.704 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.218 0 Td [(and)]TJ/F59 9.9626 Tf 19.011 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf 49.218 0 Td [(perform)-215(the)-215(same)]TJ -241.739 -11.955 Td [(action)-247(\050see)]TJ -0 0 1 rg 0 0 1 RG - [-246(6)]TJ 0 g 0 G - [(\051)-247(on)-246(dense)-247(matrices,)-247(sparse)-247(matric)1(es)-247(and)-247(communication)-246(de-)]TJ 0 -11.956 Td [(scriptors)-222(r)18(espectively)111(.)-301(Interface)-222(overloading)-223(allows)-222(the)-222(usage)-222(of)-223(the)-222(same)]TJ 0 -11.955 Td [(subr)18(outine)-250(names)-250(for)-250(both)-250(r)18(eal)-250(and)-250(complex)-250(data.)]TJ -24.907 -23.137 Td [(In)-288(the)-288(description)-288(of)-289(the)-288(subr)18(outines,)-297(ar)18(guments)-289(or)-288(ar)18(gument)-288(entries)-288(ar)18(e)-288(clas-)]TJ 0 -11.955 Td [(si\002ed)-250(as:)]TJ +/F67 9.9626 Tf -111.74 -19.289 Td [(call)-525(psb_cd_set_large_threshold\050ith\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -23.137 Td [(global)]TJ +/F59 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(For)-270(input)-270(ar)18(guments,)-275(the)-270(value)-271(must)-270(be)-270(the)-270(same)-270(on)-270(all)-270(pr)18(ocesses)-270(par)18(-)]TJ -8.856 -11.955 Td [(ticipating)-276(in)-277(the)-276(subr)18(outine)-277(call;)-289(for)-277(output)-276(ar)18(guments)-277(the)-276(value)-277(is)-276(guar)18(-)]TJ 0 -11.955 Td [(anteed)-250(to)-250(be)-250(the)-250(same.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.208 Td [(local)]TJ +/F59 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(Each)-250(pr)18(ocess)-250(has)-250(its)-250(own)-250(value\050s\051)-250(independently)111(.)]TJ -26.56 -23.137 Td [(T)92(o)-250(\002nish)-250(our)-250(general)-250(description,)-250(we)-250(de\002ne)-250(a)-250(version)-250(string)-250(with)-250(the)-250(constant)]TJ/F59 9.9626 Tf 122.168 -24.059 Td [(psb_version_string_)]TJ/F54 9.9626 Tf -122.168 -24.059 Td [(whose)-250(curr)18(ent)-250(value)-250(is)]TJ/F59 9.9626 Tf 101.857 0 Td [(3.8.0)]TJ 0 g 0 G -/F54 9.9626 Tf 67.508 -29.888 Td [(5)]TJ +/F62 9.9626 Tf 166.874 -29.888 Td [(14)]TJ 0 g 0 G ET endstream endobj -841 0 obj +1036 0 obj << -/Length 8378 +/Length 5640 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(2.3)-1000(Application)-250(structure)]TJ/F54 9.9626 Tf 0 -19.381 Td [(The)-244(main)-244(underlyi)1(ng)-244(principle)-244(of)-244(the)-244(PSBLAS)-243(library)-244(is)-244(that)-244(the)-243(library)-244(objects)]TJ 0 -11.956 Td [(ar)18(e)-236(cr)18(eated)-235(and)-236(exist)-235(with)-236(r)18(efer)18(ence)-235(to)-236(a)-235(discr)18(etized)-236(space)-236(t)1(o)-236(which)-236(ther)18(e)-235(corr)18(e-)]TJ 0 -11.955 Td [(sponds)-258(an)-257(index)-258(space)-257(and)-258(a)-258(matrix)-257(sparsity)-258(pattern.)-332(As)-258(an)-258(example,)-259(consider)]TJ 0 -11.955 Td [(a)-310(cell-center)18(ed)-309(\002nite-volume)-310(discr)18(etization)-310(of)-309(the)-310(Navier)18(-Stokes)-310(e)1(quations)-310(on)]TJ 0 -11.955 Td [(a)-234(simulation)-235(domain;)-239(the)-234(index)-235(space)-234(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 185.595 0 Td [(n)]TJ/F54 9.9626 Tf 7.998 0 Td [(is)-234(isomorphic)-235(to)-234(the)-234(set)-235(of)-234(cell)-234(cen-)]TJ -193.593 -11.955 Td [(ters,)-210(wher)18(eas)-200(the)-201(pattern)-200(of)-200(the)-201(associated)-200(linear)-200(system)-200(matrix)-201(is)-200(isomorphic)-200(to)]TJ 0 -11.956 Td [(the)-294(adjacency)-294(graph)-294(imposed)-294(on)-294(the)-294(discr)18(et)1(ization)-294(mesh)-294(by)-294(the)-294(discr)18(etization)]TJ 0 -11.955 Td [(stencil.)]TJ 14.944 -12.17 Td [(Thus)-343(the)-343(\002rst)-343(or)18(der)-344(of)-343(business)-343(is)-343(to)-343(establish)-343(an)-343(index)-343(space,)-367(and)-343(this)-343(is)]TJ -14.944 -11.955 Td [(done)-287(with)-287(a)-287(call)-287(to)]TJ/F59 9.9626 Tf 85.52 0 Td [(psb_cdall)]TJ/F54 9.9626 Tf 49.932 0 Td [(in)-287(which)-287(we)-287(specify)-287(the)-287(size)-287(of)-287(the)-287(index)-287(space)]TJ/F52 9.9626 Tf -135.328 -11.956 Td [(n)]TJ/F54 9.9626 Tf 8.041 0 Td [(and)-238(the)-239(allocation)-238(of)-239(the)-239(ele)1(ments)-239(of)-239(the)-238(index)-239(space)-238(to)-239(the)-238(various)-239(pr)18(ocesses)]TJ -8.165 -11.955 Td [(making)-250(up)-250(the)-250(MPI)-250(\050virtual\051)-250(parallel)-250(machine.)]TJ 14.944 -12.17 Td [(The)-366(index)-367(space)-366(is)-366(partitioned)-367(among)-366(pr)18(ocesses,)-396(and)-366(this)-366(cr)18(eates)-367(a)-366(map-)]TJ -14.944 -11.956 Td [(ping)-301(fr)18(om)-300(the)-301(\223global\224)-301(numbering)-300(1)-180(.)-191(.)-192(.)]TJ/F52 9.9626 Tf 176.584 0 Td [(n)]TJ/F54 9.9626 Tf 8.659 0 Td [(to)-301(a)-300(numbering)-301(\223local\224)-301(to)-301(each)-300(pr)18(o-)]TJ -185.243 -11.955 Td [(cess;)-230(each)-221(pr)18(ocess)]TJ/F52 9.9626 Tf 79.682 0 Td [(i)]TJ/F54 9.9626 Tf 5.162 0 Td [(will)-221(own)-220(a)-221(certain)-220(subset)-221(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 130.532 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.537 -1.649 Td [(i)]TJ/F54 9.9626 Tf 2.775 3.143 Td [(,)-226(each)-221(element)-221(of)-220(which)]TJ -241.352 -11.955 Td [(corr)18(esponds)-258(to)-259(a)-258(certain)-258(element)-258(of)-259(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 177.035 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 0 Td [(.)-335(The)-258(user)-259(doe)1(s)-259(not)-258(set)-258(explicitly)-259(this)]TJ -182.699 -11.955 Td [(mapping;)-225(when)-212(the)-212(application)-212(needs)-213(to)-212(indicate)-212(to)-212(which)-213(el)1(ement)-213(of)-212(the)-212(index)]TJ 0 -11.955 Td [(space)-305(a)-306(certain)-305(item)-306(is)-305(r)18(elated,)-320(such)-305(as)-306(the)-305(r)18(ow)-306(and)-305(column)-306(index)-305(of)-306(a)-305(matrix)]TJ 0 -11.956 Td [(coef)18(\002cient,)-283(it)-276(does)-277(so)-276(in)-277(the)-276(\223global\224)-277(numb)1(ering,)-284(and)-276(the)-276(library)-277(will)-276(translate)]TJ 0 -11.955 Td [(into)-250(the)-250(appr)18(opriate)-250(\223local\224)-250(numbering.)]TJ 14.944 -12.17 Td [(For)-324(a)-325(given)-324(index)-324(space)-325(1)-179(.)-191(.)-192(.)]TJ/F52 9.9626 Tf 129.74 0 Td [(n)]TJ/F54 9.9626 Tf 8.895 0 Td [(ther)18(e)-324(ar)18(e)-325(many)-324(possible)-324(associated)-325(topolo-)]TJ -153.579 -11.956 Td [(gies,)-213(i.e.)-295(many)-204(dif)18(fer)18(ent)-204(discr)18(etization)-204(stencils;)-220(thus)-204(the)-204(description)-204(of)-204(the)-204(index)]TJ 0 -11.955 Td [(space)-277(is)-278(not)-277(completed)-278(until)-277(the)-277(user)-278(has)-277(de\002ned)-278(a)-277(sparsity)-278(p)1(attern,)-285(either)-277(ex-)]TJ 0 -11.955 Td [(plicitly)-263(thr)18(ough)]TJ/F59 9.9626 Tf 71.63 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf 49.698 0 Td [(or)-264(im)1(plicitly)-264(thr)18(ough)]TJ/F59 9.9626 Tf 95.326 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)-351(T)1(he)-264(descriptor)-263(is)]TJ -263.727 -11.955 Td [(\002nalized)-225(with)-225(a)-225(call)-226(to)]TJ/F59 9.9626 Tf 98.787 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 49.316 0 Td [(and)-225(a)-225(sparse)-225(matrix)-226(with)-225(a)-225(call)-225(to)]TJ/F59 9.9626 Tf 146.044 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)]TJ -341.22 -11.955 Td [(After)]TJ/F59 9.9626 Tf 26.16 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 50.21 0 Td [(each)-315(pr)18(ocess)]TJ/F52 9.9626 Tf 59.13 0 Td [(i)]TJ/F54 9.9626 Tf 6.101 0 Td [(will)-315(have)-315(de\002ned)-315(a)-314(set)-315(of)-315(\223halo\224)-315(\050or)-315(\223ghost\224\051)]TJ -141.601 -11.955 Td [(indices)]TJ/F52 9.9626 Tf 34.731 0 Td [(n)]TJ/F54 9.9626 Tf 5.663 -1.495 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.538 -1.648 Td [(i)]TJ/F85 10.3811 Tf 5.211 3.143 Td [(+)]TJ/F54 9.9626 Tf 10.506 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -3.831 Td [(col)]TJ/F52 5.9776 Tf 12.794 -1.648 Td [(i)]TJ/F54 9.9626 Tf 2.775 5.479 Td [(,)-377(denoting)-352(elements)-351(of)-352(the)-352(index)-351(space)-352(that)-351(ar)18(e)]TJ/F52 9.9626 Tf 215.582 0 Td [(not)]TJ/F54 9.9626 Tf -330.431 -13.79 Td [(assigned)-289(to)-290(pr)18(ocess)]TJ/F52 9.9626 Tf 88.744 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(;)-309(however)-290(t)1(he)-290(variables)-289(associated)-290(with)-289(them)-290(ar)18(e)-289(needed)]TJ -91.708 -11.955 Td [(to)-289(complete)-289(computations)-289(associated)-289(with)-290(the)-289(sparse)-289(matrix)]TJ/F52 9.9626 Tf 269.662 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(,)-299(and)-289(thus)-289(they)]TJ -276.98 -11.955 Td [(have)-266(to)-266(be)-266(fetched)-265(fr)18(om)-266(\050neighbouring\051)-266(pr)18(ocesses.)-358(The)-266(descriptor)-265(of)-266(the)-266(index)]TJ 0 -11.956 Td [(space)-294(is)-293(built)-294(exactly)-294(for)-293(the)-294(purpose)-294(of)-293(pr)18(operly)-294(sequencing)-294(the)-293(communica-)]TJ 0 -11.955 Td [(tion)-250(steps)-250(r)18(equir)18(ed)-250(to)-250(achieve)-250(this)-250(objective.)]TJ 14.944 -12.17 Td [(A)-197(simple)-197(application)-197(str)8(uctur)18(e)-197(will)-197(walk)-197(thr)18(ough)-197(the)-197(index)-197(space)-197(allocation,)]TJ -14.944 -11.956 Td [(matrix/vector)-250(cr)18(eation)-250(and)-250(linear)-250(system)-250(solution)-250(as)-250(follows:)]TJ -0 g 0 G - 12.453 -20.571 Td [(1.)]TJ -0 g 0 G - [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F59 9.9626 Tf 171.465 0 Td [(psb_init)]TJ -0 g 0 G -/F54 9.9626 Tf -171.465 -20.787 Td [(2.)]TJ -0 g 0 G - [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F59 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ -0 g 0 G -/F54 9.9626 Tf -130.489 -20.788 Td [(3.)]TJ -0 g 0 G - [-500(Allocate)-221(sparse)-221(matrix)-221(and)-221(dense)-221(vectors)-220(with)]TJ/F59 9.9626 Tf 215.843 0 Td [(psb_spall)]TJ/F54 9.9626 Tf 49.274 0 Td [(and)]TJ/F59 9.9626 Tf 19.068 0 Td [(psb_geall)]TJ 0 g 0 G -/F54 9.9626 Tf -284.185 -20.787 Td [(4.)]TJ -0 g 0 G - [-500(Loop)-320(over)-320(all)-320(local)-320(r)18(ows,)-338(generate)-320(matrix)-320(and)-320(vector)-320(entries,)-337(and)-320(insert)]TJ 12.454 -11.955 Td [(them)-250(with)]TJ/F59 9.9626 Tf 47.849 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.564 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ -0 g 0 G -/F54 9.9626 Tf -129.225 -20.787 Td [(5.)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(ith)]TJ 0 g 0 G - [-500(Assemble)-250(the)-250(various)-250(entities:)]TJ +/F62 9.9626 Tf 17.704 0 Td [(the)-250(new)-250(thr)18(eshold)-250(for)-250(communication)-250(descriptors.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(gr)18(eater)-250(than)-250(zer)18(o.)]TJ -24.907 -20.813 Td [(Note:)-665(the)-427(thr)18(eshold)-428(value)-427(is)-428(only)-427(queried)-428(by)-427(the)-428(library)-427(at)-428(the)-427(time)-428(a)-427(call)]TJ 0 -11.955 Td [(to)]TJ/F67 9.9626 Tf 12.451 0 Td [(psb_cdall)]TJ/F62 9.9626 Tf 50.837 0 Td [(is)-378(executed,)-409(ther)18(efor)18(e)-378(changing)-378(the)-378(thr)18(eshold)-377(has)-378(no)-378(ef)18(fect)-378(on)]TJ -63.288 -11.955 Td [(communication)-339(descriptors)-340(that)-339(have)-339(alr)18(eady)-340(been)-339(initialized.)-578(Mor)18(eover)-339(the)]TJ 0 -11.955 Td [(thr)18(eshold)-250(must)-250(have)-250(the)-250(same)-250(value)-250(on)-250(all)-250(pr)18(ocesses.)]TJ/F59 9.9626 Tf 0 -26.933 Td [(3.1.12)-1000(get)]TJ +ET +q +1 0 0 1 149.2 574.896 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 152.189 574.697 Td [(p)]TJ +ET +q +1 0 0 1 158.874 574.896 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 161.863 574.697 Td [(adjcncy)-250(\227)-250(Get)-250(process)-250(adjacency)-250(list)]TJ 0 g 0 G - 17.774 -20.787 Td [(\050a\051)]TJ 0 g 0 G -/F59 9.9626 Tf 16.597 0 Td [(psb_cdasb)]TJ +/F67 9.9626 Tf -61.968 -18.964 Td [(list)-525(=)-1050(desc%get_p_adjcncy\050\051)]TJ 0 g 0 G -/F54 9.9626 Tf -17.125 -16.371 Td [(\050b\051)]TJ +/F59 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ 0 g 0 G -/F59 9.9626 Tf 17.125 0 Td [(psb_spasb)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf -16.039 -16.371 Td [(\050c\051)]TJ +/F59 9.9626 Tf -29.828 -19.483 Td [(On)-250(Return)]TJ 0 g 0 G -/F59 9.9626 Tf 16.039 0 Td [(psb_geasb)]TJ 0 g 0 G -/F54 9.9626 Tf 122.541 -29.888 Td [(6)]TJ + 0 -19.484 Td [(Function)-250(value)]TJ 0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-190(curr)18(ent)-190(list)-190(of)-190(adjacent)-190(pr)18(ocesses,)-202(i.e.)-290(pr)18(ocesses)-190(with)-190(which)]TJ -47.87 -11.955 Td [(the)-250(curr)18(ent)-250(one)-250(has)-250(to)-250(exchange)-250(halo)-250(data.)]TJ/F59 9.9626 Tf -24.907 -26.933 Td [(3.1.13)-1000(set)]TJ ET - -endstream -endobj -854 0 obj -<< -/Length 7484 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 148.085 457.264 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F54 9.9626 Tf 112.349 706.129 Td [(6.)]TJ -0 g 0 G - [-500(Choose)-301(the)-300(pr)18(econditioner)-301(to)-300(be)-301(used)-300(with)]TJ/F59 9.9626 Tf 205.537 0 Td [(prec%init)]TJ/F54 9.9626 Tf 50.068 0 Td [(and)-301(build)-300(it)-301(with)]TJ/F59 9.9626 Tf -243.152 -11.955 Td [(prec%build)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 52.303 3.616 Td [(3)]TJ -0 g 0 G -/F54 9.9626 Tf 4.284 -3.616 Td [(.)]TJ +/F59 9.9626 Tf 151.073 457.065 Td [(p)]TJ +ET +q +1 0 0 1 157.758 457.264 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 160.747 457.065 Td [(adjcncy)-250(\227)-250(Set)-250(process)-250(adjacency)-250(list)]TJ 0 g 0 G - -69.04 -22.307 Td [(7.)]TJ 0 g 0 G - [-500(Call)-190(the)-190(iterative)-190(driver)]TJ/F59 9.9626 Tf 115.326 0 Td [(psb_krylov)]TJ/F54 9.9626 Tf 54.196 0 Td [(with)-190(the)-190(method)-190(of)-190(choice,)-202(e.g.)]TJ/F59 9.9626 Tf 134.982 0 Td [(bicgstab)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -358.801 -21.712 Td [(This)-250(is)-250(the)-250(str)8(uctur)18(e)-250(of)-250(the)-250(sample)-250(pr)18(ograms)-250(in)-250(the)-250(dir)18(ectory)]TJ/F59 9.9626 Tf 266.418 0 Td [(test/pargen/)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)]TJ -314.238 -12.551 Td [(For)-257(a)-258(simulation)-257(in)-257(which)-257(the)-258(same)-257(discr)18(etization)-257(mesh)-257(is)-258(used)-257(over)-257(multi-)]TJ -14.944 -11.955 Td [(ple)-250(time)-250(steps,)-250(the)-250(following)-250(str)8(uctur)18(e)-250(may)-250(be)-250(mor)18(e)-250(appr)18(opriate:)]TJ +/F67 9.9626 Tf -60.852 -18.964 Td [(call)-525(desc%set_p_adjcncy\050list\051)]TJ 0 g 0 G - 12.454 -21.712 Td [(1.)]TJ +/F59 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Initialize)-250(parallel)-250(envir)18(onment)-250(with)]TJ/F59 9.9626 Tf 171.464 0 Td [(psb_init)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf -171.464 -22.307 Td [(2.)]TJ +/F59 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(Initialize)-250(index)-250(space)-250(with)]TJ/F59 9.9626 Tf 130.489 0 Td [(psb_cdall)]TJ 0 g 0 G -/F54 9.9626 Tf -130.489 -22.307 Td [(3.)]TJ + 0 -19.483 Td [(list)]TJ 0 g 0 G - [-500(Loop)-248(over)-248(the)-248(topology)-248(of)-248(the)-248(discr)18(eti)1(zation)-248(mesh)-248(and)-248(build)-248(the)-248(descrip-)]TJ 12.453 -11.955 Td [(tor)-250(with)]TJ/F59 9.9626 Tf 37.857 0 Td [(psb_cdins)]TJ +/F62 9.9626 Tf 19.358 0 Td [(the)-250(list)-250(of)-250(adjacent)-250(pr)18(ocesses.)]TJ 5.549 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F67 9.9626 Tf 250.209 0 Td [(psb_ipk_)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ -316.959 -20.813 Td [(Note:)-596(this)-392(method)-393(can)-393(be)-393(called)-393(after)-392(a)-393(call)-393(to)]TJ/F67 9.9626 Tf 216.367 0 Td [(psb_cdall)]TJ/F62 9.9626 Tf 50.987 0 Td [(and)-393(befor)18(e)-393(a)-392(call)]TJ -267.354 -11.955 Td [(to)]TJ/F67 9.9626 Tf 11.711 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)-470(The)-304(user)-303(is)-304(specifying)-303(her)18(e)-304(some)-303(knowledge)-304(about)-303(which)-304(pr)18(o-)]TJ -58.784 -11.955 Td [(cesses)-208(ar)18(e)-208(topol)1(ogical)-208(neighbours)-208(of)-208(the)-207(curr)18(ent)-208(pr)18(ocess.)-296(The)-208(availability)-207(of)-208(this)]TJ 0 -11.955 Td [(information)-250(may)-250(speed)-250(up)-250(the)-250(execution)-250(of)-250(the)-250(assembly)-250(call)]TJ/F67 9.9626 Tf 269.656 0 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)]TJ/F59 9.9626 Tf -316.729 -26.934 Td [(3.1.14)-1000(fnd)]TJ +ET +q +1 0 0 1 151.412 247.089 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 154.401 246.89 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(process)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ 0 g 0 G -/F54 9.9626 Tf -50.31 -22.307 Td [(4.)]TJ 0 g 0 G - [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F59 9.9626 Tf 144.386 0 Td [(psb_cdasb)]TJ +/F67 9.9626 Tf -54.506 -18.964 Td [(call)-525(desc%fnd_owner\050idx,iprc,info\051)]TJ 0 g 0 G -/F54 9.9626 Tf -144.386 -22.307 Td [(5.)]TJ +/F59 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Allocate)-190(the)-190(sparse)-190(matrices)-190(and)-190(dense)-190(vectors)-190(with)]TJ/F59 9.9626 Tf 237.676 0 Td [(psb_spall)]TJ/F54 9.9626 Tf 48.966 0 Td [(and)]TJ/F59 9.9626 Tf 18.759 0 Td [(psb_geall)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf -305.401 -22.308 Td [(6.)]TJ +/F59 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(Loop)-250(over)-250(the)-250(time)-250(steps:)]TJ 0 g 0 G - 17.773 -22.307 Td [(\050a\051)]TJ + 0 -19.484 Td [(idx)]TJ 0 g 0 G - [-500(If)-297(after)-298(\002rst)-297(time)-298(step,)-309(r)18(einitialize)-297(the)-298(sparse)-297(matrix)-298(with)]TJ/F59 9.9626 Tf 269.151 0 Td [(psb_sprn)]TJ/F54 9.9626 Tf 41.843 0 Td [(;)]TJ -294.396 -11.955 Td [(also)-250(zer)18(o)-250(out)-250(the)-250(dense)-250(vectors;)]TJ +/F62 9.9626 Tf 19.368 0 Td [(the)-250(list)-250(of)-250(global)-250(indices)-250(for)-250(which)-250(we)-250(need)-250(the)-250(owning)-250(pr)18(ocesses.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F67 9.9626 Tf 250.209 0 Td [(psb_lpk_)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ 0 g 0 G - -17.126 -17.131 Td [(\050b\051)]TJ + -150.084 -29.888 Td [(15)]TJ 0 g 0 G - [-500(Loop)-428(over)-429(the)-428(mesh,)-473(generate)-429(the)-428(coef)18(\002cients)-429(and)-428(insert/update)]TJ 17.126 -11.955 Td [(them)-250(with)]TJ/F59 9.9626 Tf 47.85 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.563 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ +ET + +endstream +endobj +1043 0 obj +<< +/Length 7676 +>> +stream 0 g 0 G -/F54 9.9626 Tf -132.811 -17.131 Td [(\050c\051)]TJ 0 g 0 G - [-500(Assemble)-250(with)]TJ/F59 9.9626 Tf 84.223 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 49.564 0 Td [(and)]TJ/F59 9.9626 Tf 19.357 0 Td [(psb_geasb)]TJ 0 g 0 G -/F54 9.9626 Tf -154.808 -17.132 Td [(\050d\051)]TJ +BT +/F59 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(Choose)-250(and)-250(build)-250(pr)18(econditioner)-250(with)]TJ/F59 9.9626 Tf 188.671 0 Td [(prec%init)]TJ/F54 9.9626 Tf 49.563 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(prec%build)]TJ 0 g 0 G -/F54 9.9626 Tf -256.277 -17.131 Td [(\050e\051)]TJ + 0 -21.934 Td [(iprc)]TJ 0 g 0 G - [-500(Call)-250(the)-250(iterative)-250(method)-250(of)-250(choice,)-250(e.g.)]TJ/F59 9.9626 Tf 190.902 0 Td [(psb_bicgstab)]TJ/F54 9.9626 Tf -221.338 -22.307 Td [(The)-276(insertion)-275(r)18(outines)-276(will)-275(be)-276(called)-275(as)-276(many)-276(times)-275(as)-276(needed;)-288(they)-276(only)-275(need)]TJ 0 -11.955 Td [(to)-214(be)-213(called)-214(on)-213(the)-214(data)-214(t)1(hat)-214(is)-214(actually)-213(allocated)-214(to)-213(the)-214(curr)18(ent)-213(pr)18(ocess,)-221(i.e.)-298(each)]TJ 0 -11.955 Td [(pr)18(ocess)-250(generates)-250(its)-250(own)-250(data.)]TJ 14.944 -12.551 Td [(In)-219(principle)-218(ther)18(e)-219(is)-219(no)-218(speci\002c)-219(or)18(der)-219(in)-218(the)-219(calls)-219(to)]TJ/F59 9.9626 Tf 220.804 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 47.073 0 Td [(,)-225(nor)-219(is)-218(ther)18(e)-219(a)]TJ -282.821 -11.955 Td [(r)18(equir)18(ement)-243(to)-243(build)-243(a)-242(matrix)-243(r)18(ow)-243(in)-243(its)-243(entir)18(ety)-243(befor)18(e)-242(calling)-243(the)-243(r)18(outine;)-245(this)]TJ 0 -11.955 Td [(allows)-364(t)1(he)-364(application)-363(pr)18(ogrammer)-364(to)-363(walk)-364(thr)18(ough)-363(the)-364(discr)18(etization)-363(mesh)]TJ 0 -11.955 Td [(element)-316(by)-317(element,)-333(generating)-316(the)-316(main)-317(part)-316(of)-316(a)-317(given)-316(matrix)-316(r)18(ow)-317(but)-316(also)]TJ 0 -11.956 Td [(contributions)-250(to)-250(the)-250(r)18(ows)-250(corr)18(esponding)-250(to)-250(neighbouring)-250(elements.)]TJ 14.944 -12.55 Td [(Fr)18(om)-328(a)-329(func)1(tional)-329(point)-328(of)-328(view)-328(it)-329(is)-328(even)-328(possible)-328(to)-329(exe)1(cute)-329(one)-328(call)-328(for)]TJ -14.944 -11.955 Td [(each)-204(nonzer)18(o)-204(coef)18(\002cient;)-219(however)-203(this)-204(would)-204(have)-204(a)-204(subst)1(antial)-204(computational)]TJ 0 -11.955 Td [(over)18(head.)-457(It)-299(is)-299(ther)18(efor)18(e)-299(advisable)-299(to)-299(pack)-299(a)-299(certain)-299(amount)-299(of)-299(data)-299(into)-299(each)]TJ 0 -11.956 Td [(call)-303(to)-303(the)-302(insertion)-303(r)18(outine,)-316(say)-303(touching)-303(on)-302(a)-303(few)-303(tens)-303(of)-302(r)18(ows;)-330(the)-302(best)-303(per)18(-)]TJ 0 -11.955 Td [(formng)-342(value)-343(would)-342(depend)-342(on)-342(both)-343(the)-342(ar)18(chitectur)18(e)-342(of)-343(the)-342(computer)-342(being)]TJ 0 -11.955 Td [(used)-223(and)-223(on)-222(the)-223(pr)18(oblem)-223(str)8(uctur)18(e.)-301(At)-222(the)-223(opposite)-223(extr)18(eme,)-228(it)-223(would)-222(be)-223(possi-)]TJ 0 -11.955 Td [(ble)-267(to)-267(generate)-267(the)-267(entir)18(e)-267(part)-267(of)-267(a)-267(coef)18(\002cient)-267(matrix)-267(r)18(esiding)-267(on)-267(a)-267(pr)18(ocess)-267(and)]TJ 0 -11.955 Td [(pass)-275(it)-274(in)-275(a)-275(single)-274(call)-275(to)]TJ/F59 9.9626 Tf 108.421 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 47.073 0 Td [(;)-287(this,)-281(however)74(,)-281(would)-274(entail)-275(a)-275(doubling)-274(of)]TJ -155.494 -11.956 Td [(memory)-250(occupation,)-250(and)-250(thus)-250(would)-250(be)-250(almost)-250(always)-250(far)-250(fr)18(om)-250(optimal.)]TJ +/F62 9.9626 Tf 22.685 0 Td [(the)-250(list)-250(of)-250(pr)18(ocesses)-250(owning)-250(the)-250(indices)-250(in)]TJ/F67 9.9626 Tf 184.993 0 Td [(idx)]TJ/F62 9.9626 Tf 15.691 0 Td [(.)]TJ -198.463 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-408(as:)-627(an)-408(allocatable)-408(one-dimensional)-408(array)-409(of)-408(integers)-408(of)-408(kind)]TJ/F67 9.9626 Tf 0 -11.955 Td [(psb_ipk_)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ -66.749 -23.926 Td [(Note:)-349(this)-269(method)-269(may)-270(or)-269(may)-269(not)-269(actually)-270(r)18(equir)18(e)-269(communications,)-274(depend-)]TJ 0 -11.956 Td [(ing)-283(on)-283(the)-283(ex)1(a)-1(c)1(t)-283(internal)-283(data)-283(storage;)-299(given)-283(that)-283(the)-283(choice)-283(of)-282(storage)-283(may)-283(be)]TJ 0 -11.955 Td [(alter)18(ed)-376(by)-375(r)8(untime)-376(parameters,)-407(it)-376(is)-375(necessary)-376(for)-376(safety)-375(that)-376(this)-375(method)-376(is)]TJ 0 -11.955 Td [(called)-250(by)-250(all)-250(pr)18(ocesses.)]TJ/F59 9.9626 Tf 0 -29.998 Td [(3.1.15)-1000(Named)-250(Constants)]TJ 0 g 0 G + 0 -19.937 Td [(psb)]TJ ET q -1 0 0 1 99.895 139.555 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +1 0 0 1 167.9 514.891 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 5.9776 Tf 110.755 132.683 Td [(3)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(The)-260(subr)18(outine)-260(style)]TJ/F89 7.9701 Tf 74.235 0 Td [(psb)]TJ +/F59 9.9626 Tf 170.889 514.692 Td [(none)]TJ ET q -1 0 0 1 201.687 129.989 cm -[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +1 0 0 1 194.182 514.891 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G BT -/F89 7.9701 Tf 204.228 129.79 Td [(precinit)]TJ/F54 7.9701 Tf 35.946 0 Td [(and)]TJ/F89 7.9701 Tf 15.567 0 Td [(psb)]TJ +/F62 9.9626 Tf 202.152 514.692 Td [(Generic)-250(no-op;)]TJ +0 g 0 G +/F59 9.9626 Tf -51.447 -21.934 Td [(psb)]TJ ET q -1 0 0 1 268.951 129.989 cm -[]0 d 0 J 0.398 w 0 0 m 2.541 0 l S +1 0 0 1 167.9 492.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F89 7.9701 Tf 271.492 129.79 Td [(precbl)]TJ/F54 7.9701 Tf 27.478 0 Td [(ar)18(e)-260(still)-260(supported)-260(for)-260(backwar)18(d)-260(compat-)]TJ -199.075 -9.464 Td [(ibility)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 169.365 -29.888 Td [(7)]TJ -0 g 0 G +/F59 9.9626 Tf 170.889 492.758 Td [(root)]TJ ET - -endstream -endobj -875 0 obj -<< -/Length 7173 ->> -stream -0 g 0 G +q +1 0 0 1 189.758 492.957 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(2.3.1)-1000(User)18(-de\002ned)-250(index)-250(mappings)]TJ/F54 9.9626 Tf 0 -18.964 Td [(PSBLAS)-316(supports)-315(user)18(-de\002ned)-316(global)-316(to)-315(local)-316(index)-316(mappings,)-332(subject)-315(to)-316(the)]TJ 0 -11.955 Td [(constraints)-250(outlined)-250(in)-250(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-250(2.3)]TJ -0 g 0 G - [(:)]TJ -0 g 0 G - 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-250(set)-250(of)-250(indices)-250(owned)-250(locally)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)-250(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 294.494 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -1.495 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F54 9.9626 Tf 2.775 3.143 Td [(;)]TJ -0 g 0 G - -320.47 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(The)-250(set)-250(of)-250(halo)-250(points)-250(must)-250(be)-250(mapped)-250(to)-250(the)-250(set)]TJ/F52 9.9626 Tf 228.286 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -1.494 Td [(r)18(ow)]TJ/F52 5.9776 Tf 17.537 -1.648 Td [(i)]TJ/F85 10.3811 Tf 4.836 3.142 Td [(+)]TJ/F54 9.9626 Tf 10.132 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.966 0 Td [(n)]TJ/F54 9.9626 Tf 5.664 -3.83 Td [(col)]TJ/F52 5.9776 Tf 12.795 -1.649 Td [(i)]TJ/F54 9.9626 Tf 2.774 5.479 Td [(;)]TJ -320.107 -21.759 Td [(but)-289(otherwise)-289(the)-288(mapping)-289(is)-289(arbitrary)111(.)-426(The)-289(user)-289(application)-289(is)-288(r)18(esponsible)-289(to)]TJ 0 -11.956 Td [(ensur)18(e)-262(consistency)-261(of)-262(this)-262(mapping;)-267(some)-262(err)18(ors)-262(may)-261(be)-262(caught)-262(by)-261(the)-262(library)111(,)]TJ 0 -11.955 Td [(but)-236(this)-236(is)-236(not)-236(guaranteed.)-305(The)-236(application)-236(str)8(uctur)18(e)-236(to)-236(support)-236(this)-236(usage)-236(is)-236(as)]TJ 0 -11.955 Td [(follows:)]TJ -0 g 0 G - 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(Initialize)-190(index)-190(space)-190(with)]TJ/F59 9.9626 Tf 128.098 0 Td [(psb_cdall\050ictx,desc,info,vl=vl,lidx=lidx\051)]TJ/F54 9.9626 Tf -115.645 -11.955 Td [(passing)-292(the)-293(vectors)]TJ/F59 9.9626 Tf 87.882 0 Td [(vl\050:\051)]TJ/F54 9.9626 Tf 29.064 0 Td [(containing)-292(the)-293(set)-292(of)-293(global)-292(indices)-292(owned)-293(by)]TJ -116.946 -11.956 Td [(the)-327(curr)18(ent)-328(pr)18(ocess)-327(and)]TJ/F59 9.9626 Tf 108.493 0 Td [(lidx\050:\051)]TJ/F54 9.9626 Tf 39.873 0 Td [(containing)-327(the)-327(corr)18(esponding)-328(local)-327(in-)]TJ -148.366 -11.955 Td [(dices;)]TJ -0 g 0 G - -12.453 -19.925 Td [(2.)]TJ -0 g 0 G - [-500(Add)-412(the)-412(halo)-412(points)]TJ/F59 9.9626 Tf 109.326 0 Td [(ja\050:\051)]TJ/F54 9.9626 Tf 30.256 0 Td [(and)-412(their)-412(associated)-412(local)-412(indices)]TJ/F59 9.9626 Tf 155.064 0 Td [(lidx\050:\051)]TJ/F54 9.9626 Tf -282.193 -11.955 Td [(with)-250(a\050some\051)-250(call\050s\051)-250(to)]TJ/F59 9.9626 Tf 99.815 0 Td [(psb_cdins\050nz,ja,desc,info,lidx=lidx\051)]TJ/F54 9.9626 Tf 188.292 0 Td [(;)]TJ -0 g 0 G - -300.56 -19.926 Td [(3.)]TJ -0 g 0 G - [-500(Assemble)-250(the)-250(descriptor)-250(with)]TJ/F59 9.9626 Tf 144.387 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(;)]TJ -0 g 0 G - -191.46 -19.925 Td [(4.)]TJ -0 g 0 G - [-500(Build)-190(the)-190(sparse)-190(matrices)-190(and)-190(vectors,)-202(optionally)-190(making)-190(use)-190(in)]TJ/F59 9.9626 Tf 288.117 0 Td [(psb_spins)]TJ/F54 9.9626 Tf -275.664 -11.955 Td [(and)]TJ/F59 9.9626 Tf 19.958 0 Td [(psb_geins)]TJ/F54 9.9626 Tf 50.163 0 Td [(of)-310(the)]TJ/F59 9.9626 Tf 28.756 0 Td [(local)]TJ/F54 9.9626 Tf 29.243 0 Td [(ar)18(gument)-310(specifying)-310(that)-310(the)-311(indices)-310(in)]TJ/F59 9.9626 Tf 177.734 0 Td [(ia)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf -316.314 -11.955 Td [(ja)]TJ/F54 9.9626 Tf 12.952 0 Td [(and)]TJ/F59 9.9626 Tf 19.357 0 Td [(irw)]TJ/F54 9.9626 Tf 15.691 0 Td [(,)-250(r)18(espectively)111(,)-250(ar)18(e)-250(alr)18(eady)-250(local)-250(indices.)]TJ/F51 11.9552 Tf -72.906 -29.133 Td [(2.4)-1000(Programming)-250(model)]TJ/F54 9.9626 Tf 0 -18.964 Td [(The)-316(PSBLAS)-315(librarary)-316(is)-315(based)-316(on)-315(the)-316(Single)-315(Pr)18(ogram)-316(Multiple)-316(Data)-315(\050SPMD\051)]TJ 0 -11.955 Td [(pr)18(ogramming)-277(model:)-364(each)-277(pr)18(ocess)-277(participatin)1(g)-277(in)-277(the)-277(computation)-277(performs)]TJ 0 -11.955 Td [(the)-250(same)-250(actions)-250(on)-250(a)-250(chunk)-250(of)-250(data.)-310(Parallelism)-250(is)-250(thus)-250(data-driven.)]TJ 14.944 -11.956 Td [(Because)-313(of)-313(this)-312(str)8(uctur)18(e,)-329(many)-313(subr)18(outines)-313(coor)18(dinate)-312(their)-313(action)-313(acr)18(oss)]TJ -14.944 -11.955 Td [(the)-336(various)-336(pr)18(ocesses,)-358(thus)-336(pr)18(oviding)-336(an)-336(implicit)-336(synchr)18(onization)-336(point,)-358(and)]TJ 0 -11.955 Td [(ther)18(efor)18(e)]TJ/F52 9.9626 Tf 43.283 0 Td [(must)]TJ/F54 9.9626 Tf 24.136 0 Td [(be)-367(called)-366(simultaneously)-367(by)-366(all)-367(pr)18(ocesses)-367(participating)-366(in)-367(the)]TJ -67.419 -11.955 Td [(computation.)-525(This)-321(is)-322(certainly)-322(tr)8(ue)-321(for)-322(the)-322(data)-321(allocation)-322(and)-322(assembl)1(y)-322(r)18(ou-)]TJ 0 -11.955 Td [(tines,)-250(for)-250(all)-250(the)-250(computational)-250(r)18(outines)-250(and)-250(for)-250(some)-250(of)-250(the)-250(tools)-250(r)18(outines.)]TJ 14.944 -11.955 Td [(However)-333(ther)18(e)-332(ar)18(e)-333(many)-333(cases)-332(wher)18(e)-333(no)-333(synchr)18(onizati)1(on,)-354(and)-332(indeed)-333(no)]TJ -14.944 -11.956 Td [(communication)-344(among)-343(pr)18(ocesses,)-367(is)-344(implied;)-390(for)-344(instance,)-367(all)-344(the)-343(r)18(outines)-344(in)]TJ 0 -11.955 Td [(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-246(3)]TJ -0 g 0 G - [-247(ar)18(e)-246(only)-246(acting)-246(on)-247(the)-246(local)-246(data)-247(str)8(uctur)18(es,)-247(and)-246(thus)-246(may)-247(be)-246(called)-246(inde-)]TJ 0 -11.955 Td [(pendently)111(.)-306(The)-238(most)-238(important)-237(case)-238(is)-238(that)-238(of)-238(the)-238(coef)18(\002cient)-237(insertion)-238(r)18(outines:)]TJ 0 -11.955 Td [(since)-231(the)-231(number)-231(of)-230(coef)18(\002cients)-231(in)-231(the)-231(sparse)-231(and)-231(dense)-230(matrices)-231(varies)-231(among)]TJ 0 -11.955 Td [(the)-248(pr)18(ocessors,)-249(and)-249(since)-248(the)-249(user)-248(is)-249(fr)18(ee)-248(to)-249(choose)-248(an)-249(arbitrary)-248(or)18(der)-249(in)-248(builid-)]TJ 0 -11.955 Td [(ing)-250(the)-250(matrix)-250(entries,)-250(these)-250(r)18(outines)-250(cannot)-250(imply)-250(a)-250(synchr)18(onization.)]TJ 14.944 -11.956 Td [(Thr)18(oughout)-250(this)-250(user)-74('s)-250(guide)-250(each)-250(subr)18(outine)-250(will)-250(be)-250(clearly)-250(indicated)-250(as:)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(Synchronous:)]TJ -0 g 0 G -/F54 9.9626 Tf 67.247 0 Td [(must)-307(be)-307(called)-308(simultaneously)-307(by)-307(all)-307(the)-308(pr)18(ocesses)-307(in)-307(the)-307(r)18(ele-)]TJ -42.341 -11.955 Td [(vant)-250(communication)-250(context;)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(Asynchronous:)]TJ +/F62 9.9626 Tf 197.728 492.758 Td [(Default)-250(r)18(oot)-250(pr)18(ocess)-250(for)-250(br)18(oadcast)-250(and)-250(scatter)-250(operations;)]TJ 0 g 0 G -/F54 9.9626 Tf 73.334 0 Td [(may)-250(be)-250(called)-250(in)-250(a)-250(totally)-250(independent)-250(manner)74(.)]TJ +/F59 9.9626 Tf -47.023 -21.934 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 471.023 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 470.824 Td [(nohalo)]TJ +ET +q +1 0 0 1 203.038 471.023 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G - 96.031 -56.634 Td [(8)]TJ +BT +/F62 9.9626 Tf 211.008 470.824 Td [(Do)-250(not)-250(fetch)-250(halo)-250(elements;)]TJ 0 g 0 G +/F59 9.9626 Tf -60.303 -21.934 Td [(psb)]TJ ET - -endstream -endobj -886 0 obj -<< -/Length 8187 ->> -stream -0 g 0 G +q +1 0 0 1 167.9 449.089 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 448.89 Td [(halo)]TJ +ET +q +1 0 0 1 191.412 449.089 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q 0 g 0 G BT -/F51 14.3462 Tf 99.895 705.784 Td [(3)-1000(Data)-250(Structures)-250(and)-250(Classes)]TJ/F54 9.9626 Tf 0 -23.091 Td [(In)-289(this)-288(chapter)-289(we)-289(illustrate)-288(the)-289(data)-289(str)8(uctur)18(es)-288(used)-289(for)-289(de\002nition)-289(of)-288(r)18(outines)]TJ 0 -11.956 Td [(interfaces.)-622(They)-354(include)-354(data)-354(str)8(uctur)18(es)-354(for)-354(sparse)-354(matrices,)-380(communication)]TJ 0 -11.955 Td [(descriptors)-250(and)-250(pr)18(econditioners.)]TJ 14.944 -12.156 Td [(All)-248(the)-248(data)-249(types)-248(and)-248(the)-248(basic)-248(subr)18(outine)-249(interfaces)-248(r)18(elated)-248(to)-248(descriptors)]TJ -14.944 -11.955 Td [(and)-345(sparse)-345(matrices)-344(ar)18(e)-345(de\002ned)-345(in)-345(the)-345(module)]TJ/F59 9.9626 Tf 213.323 0 Td [(psb_base_mod)]TJ/F54 9.9626 Tf 62.764 0 Td [(;)-392(this)-345(will)-345(have)]TJ -276.087 -11.955 Td [(to)-381(be)-381(included)-381(by)-381(every)-381(user)-381(subr)18(outine)-381(that)-381(makes)-381(u)1(se)-381(of)-381(the)-381(library)111(.)-703(The)]TJ 0 -11.956 Td [(pr)18(econditioners)-250(ar)18(e)-250(de\002ned)-250(in)-250(the)-250(module)]TJ/F59 9.9626 Tf 187.993 0 Td [(psb_prec_mod)]TJ/F54 9.9626 Tf -173.049 -12.156 Td [(Integer)74(,)-433(r)18(eal)-396(and)-397(complex)-396(data)-396(types)-397(ar)18(e)-396(parametrized)-396(with)-397(a)-396(kind)-396(type)]TJ -14.944 -11.955 Td [(de\002ned)-250(in)-250(the)-250(library)-250(as)-250(follows:)]TJ +/F62 9.9626 Tf 199.382 448.89 Td [(Fetch)-250(halo)-250(elements)-250(fr)18(om)-250(neighbouring)-250(pr)18(ocesses;)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.528 Td [(psb)]TJ +/F59 9.9626 Tf -48.677 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 566.32 cm +1 0 0 1 167.9 427.155 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 566.121 Td [(spk)]TJ +/F59 9.9626 Tf 170.889 426.956 Td [(sum)]TJ ET q -1 0 0 1 137.275 566.32 cm +1 0 0 1 190.854 427.155 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 145.245 566.121 Td [(Kind)-407(parameter)-406(for)-407(short)-406(pr)18(ecision)-407(r)18(eal)-406(and)-407(complex)-406(data;)-485(corr)18(e-)]TJ -20.443 -11.955 Td [(sponds)-250(to)-250(a)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 52.901 0 Td [(REAL)]TJ +/F62 9.9626 Tf 198.824 426.956 Td [(Sum)-250(overlapped)-250(elements)]TJ 0 g 0 G -/F54 9.9626 Tf 23.412 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(4)-250(bytes;)]TJ -0 g 0 G -/F51 9.9626 Tf -101.22 -20.73 Td [(psb)]TJ +/F59 9.9626 Tf -48.119 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 533.635 cm +1 0 0 1 167.9 405.221 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 533.436 Td [(dpk)]TJ +/F59 9.9626 Tf 170.889 405.022 Td [(avg)]TJ ET q -1 0 0 1 138.939 533.635 cm +1 0 0 1 187.546 405.221 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 146.909 533.436 Td [(Kind)-420(parameter)-421(for)-420(long)-420(pr)18(ecision)-421(r)18(eal)-420(and)-420(complex)-421(data;)-505(corr)18(e-)]TJ -22.107 -11.955 Td [(sponds)-250(to)-250(a)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 52.901 0 Td [(DOUBLE)-525(PRECISION)]TJ +/F62 9.9626 Tf 195.517 405.022 Td [(A)92(verage)-250(overlapped)-250(elements)]TJ 0 g 0 G -/F54 9.9626 Tf 86.176 0 Td [(declaration)-250(and)-250(is)-250(normally)-250(8)-250(bytes;)]TJ -0 g 0 G -/F51 9.9626 Tf -163.984 -20.73 Td [(psb)]TJ +/F59 9.9626 Tf -44.812 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 500.951 cm +1 0 0 1 167.9 383.288 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 500.751 Td [(mpk)]TJ +/F59 9.9626 Tf 170.889 383.088 Td [(comm)]TJ ET q -1 0 0 1 141.708 500.951 cm +1 0 0 1 199.163 383.288 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.152 383.088 Td [(halo)]TJ +ET +q +1 0 0 1 222.674 383.288 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 149.678 500.751 Td [(Kind)-250(parameter)-250(for)-250(4-bytes)-250(integer)-250(data,)-250(as)-250(is)-250(always)-250(used)-250(by)-250(MPI;)]TJ +/F62 9.9626 Tf 230.645 383.088 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F67 9.9626 Tf 124.92 0 Td [(halo_index)]TJ/F62 9.9626 Tf 54.794 0 Td [(list;)]TJ 0 g 0 G -/F51 9.9626 Tf -49.783 -20.729 Td [(psb)]TJ +/F59 9.9626 Tf -259.654 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 480.221 cm +1 0 0 1 167.9 361.354 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 480.022 Td [(epk)]TJ +/F59 9.9626 Tf 170.889 361.154 Td [(comm)]TJ ET q -1 0 0 1 137.833 480.221 cm +1 0 0 1 199.163 361.354 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.152 361.154 Td [(ext)]TJ +ET +q +1 0 0 1 216.029 361.354 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 145.803 480.022 Td [(Kind)-364(parameter)-363(for)-364(8-bytes)-363(integer)-364(data,)-391(a)-1(s)-363(is)-364(always)-363(used)-364(by)-363(the)]TJ/F59 9.9626 Tf -21.001 -11.955 Td [(sizeof)]TJ/F54 9.9626 Tf 33.873 0 Td [(methods;)]TJ +/F62 9.9626 Tf 224 361.154 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F67 9.9626 Tf 124.92 0 Td [(ext_index)]TJ/F62 9.9626 Tf 49.564 0 Td [(list;)]TJ 0 g 0 G -/F51 9.9626 Tf -58.78 -20.73 Td [(psb)]TJ +/F59 9.9626 Tf -247.779 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 447.537 cm +1 0 0 1 167.9 339.42 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 447.337 Td [(ipk)]TJ +/F59 9.9626 Tf 170.889 339.22 Td [(comm)]TJ ET q -1 0 0 1 136.169 447.537 cm +1 0 0 1 199.163 339.42 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.152 339.22 Td [(ovr)]TJ +ET +q +1 0 0 1 217.703 339.42 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 144.139 447.337 Td [(Kind)-398(parameter)-399(for)-398(\223local\224)-398(integer)-399(indices)-398(and)-398(data;)-473(with)-398(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(a)-250(4)-250(bytes)-250(integer;)]TJ +/F62 9.9626 Tf 225.673 339.22 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F67 9.9626 Tf 124.92 0 Td [(ovrlap_index)]TJ/F62 9.9626 Tf 65.255 0 Td [(list;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.729 Td [(psb)]TJ +/F59 9.9626 Tf -265.143 -21.934 Td [(psb)]TJ ET q -1 0 0 1 117.091 414.852 cm +1 0 0 1 167.9 317.486 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 120.08 414.653 Td [(lpk)]TJ +/F59 9.9626 Tf 170.889 317.286 Td [(comm)]TJ ET q -1 0 0 1 136.169 414.852 cm +1 0 0 1 199.163 317.486 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.152 317.286 Td [(mov)]TJ +ET +q +1 0 0 1 222.684 317.486 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 144.139 414.653 Td [(Kind)-328(parameter)-329(for)-328(\223global\224)-328(integer)-329(indices)-328(and)-328(data;)-368(with)-328(default)]TJ -19.337 -11.955 Td [(build)-250(options)-250(this)-250(is)-250(an)-250(8)-250(bytes)-250(integer;)]TJ -24.907 -20.529 Td [(The)-205(integer)-205(kinds)-205(for)-206(local)-205(and)-205(global)-205(indices)-205(can)-205(be)-206(chosen)-205(at)-205(con\002gur)18(e)-205(time)-205(to)]TJ 0 -11.955 Td [(hold)-266(4)-267(or)-266(8)-267(bytes,)-270(with)-266(the)-267(global)-266(indices)-266(at)-267(least)-266(as)-267(lar)18(ge)-266(as)-266(the)-267(local)-266(ones.)-359(T)92(o-)]TJ 0 -11.955 Td [(gether)-219(with)-220(the)-219(classes)-219(attributes)-219(we)-219(also)-220(discuss)-219(their)-219(methods.)-300(Most)-219(methods)]TJ 0 -11.955 Td [(detailed)-272(her)18(e)-272(only)-273(act)-272(on)-272(the)-272(local)-272(variable,)-278(i.e.)-376(their)-273(action)-272(is)-272(pur)18(ely)-272(local)-272(and)]TJ 0 -11.956 Td [(asynchr)18(onous)-359(unless)-360(otherwise)-359(stated.)-638(The)-359(list)-360(of)-359(methods)-359(her)18(e)-360(is)-359(not)-359(com-)]TJ 0 -11.955 Td [(pletely)-336(exhaustive;)-380(many)-336(methods,)-358(especially)-336(those)-336(that)-336(alter)-337(the)-336(contents)-336(of)]TJ 0 -11.955 Td [(the)-299(various)-298(objects,)-311(ar)18(e)-299(usually)-299(not)-299(needed)-298(by)-299(the)-299(end-user)74(,)-311(and)-298(ther)18(efor)18(e)-299(ar)18(e)]TJ 0 -11.955 Td [(described)-250(in)-250(the)-250(developer)-74('s)-250(documentation.)]TJ/F51 11.9552 Tf 0 -30.277 Td [(3.1)-1000(Descriptor)-250(data)-250(structure)]TJ/F54 9.9626 Tf 0 -19.353 Td [(All)-241(the)-241(gener)1(a)-1(l)-240(matrix)-241(informations)-241(and)-240(elements)-241(to)-241(be)-241(exchanged)-240(among)-241(pr)18(o-)]TJ 0 -11.956 Td [(cesses)-402(ar)18(e)-401(stor)18(ed)-402(within)-401(a)-402(data)-401(str)8(uctur)18(e)-402(of)-401(the)-402(type)]TJ/F59 9.9626 Tf 242.575 0 Td [(psb)]TJ +/F62 9.9626 Tf 230.654 317.286 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F67 9.9626 Tf 124.921 0 Td [(ovr_mst_idx)]TJ/F62 9.9626 Tf 60.024 0 Td [(list;)]TJ/F59 11.9552 Tf -264.894 -31.99 Td [(3.2)-1000(Sparse)-250(Matrix)-250(class)]TJ/F62 9.9626 Tf 0 -19.937 Td [(The)]TJ/F67 9.9626 Tf 19.623 0 Td [(psb)]TJ ET q -1 0 0 1 358.788 237.097 cm +1 0 0 1 186.647 265.558 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 361.927 236.897 Td [(desc)]TJ +/F67 9.9626 Tf 189.785 265.359 Td [(Tspmat)]TJ ET q -1 0 0 1 383.476 237.097 cm +1 0 0 1 221.795 265.558 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 386.614 236.897 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)-765(Every)]TJ -307.64 -11.955 Td [(str)8(uctur)18(e)-371(of)-370(this)-371(type)-370(is)-371(associated)-370(with)-371(a)-371(discr)18(etization)-370(pattern)-371(and)-370(enables)]TJ 0 -11.955 Td [(data)-301(communications)-302(and)-301(other)-301(operations)-302(that)-301(ar)18(e)-302(nece)1(ssa)-1(r)1(y)-302(for)-301(implement-)]TJ 0 -11.955 Td [(ing)-250(the)-250(various)-250(algorithms)-250(of)-250(inter)18(est)-250(to)-250(us.)]TJ 14.944 -12.156 Td [(The)-265(data)-266(str)8(uctur)18(e)-265(itself)]TJ/F59 9.9626 Tf 107.448 0 Td [(psb_desc_type)]TJ/F54 9.9626 Tf 70.638 0 Td [(can)-265(be)-266(tr)18(eated)-265(as)-265(an)-265(opaque)-266(object)]TJ -193.03 -11.955 Td [(handled)-321(via)-321(the)-321(tools)-321(r)18(outines)-321(of)-321(Sec.)]TJ +/F67 9.9626 Tf 224.933 265.359 Td [(type)]TJ/F62 9.9626 Tf 23.868 0 Td [(class)-296(contains)-295(all)-296(information)-296(about)-296(the)-295(local)-296(portion)-296(of)]TJ -98.096 -11.955 Td [(the)-200(sparse)-199(matrix)-200(and)-199(its)-200(storage)-200(mode.)-293(Its)-199(design)-200(is)-200(based)-199(on)-200(the)-200(ST)74(A)74(TE)-199(design)]TJ 0 -11.955 Td [(pattern)-256([)]TJ +1 0 0 rg 1 0 0 RG + [(12)]TJ +0 g 0 G + [(])-255(as)-256(detailed)-256(in)-256([)]TJ +1 0 0 rg 1 0 0 RG + [(10)]TJ +0 g 0 G + [(];)-258(the)-256(type)-256(declaration)-255(is)-256(shown)-256(in)-256(\002gur)18(e)]TJ 0 0 1 rg 0 0 1 RG - [-321(6)]TJ + [-255(2)]TJ 0 g 0 G - [-321(or)-321(the)-321(query)-321(r)18(outines)-321(detailed)-321(below;)]TJ 0 -11.956 Td [(nevertheless)-250(we)-250(include)-250(her)18(e)-250(a)-250(description)-250(for)-250(the)-250(curious)-250(r)18(eader)74(.)]TJ 14.944 -12.156 Td [(First)-229(we)-228(describe)-228(the)]TJ/F59 9.9626 Tf 92.473 0 Td [(psb_indx_map)]TJ/F54 9.9626 Tf 65.04 0 Td [(type.)-303(This)-228(is)-229(a)-228(data)-229(str)8(uctur)18(e)-228(that)-229(keeps)]TJ -172.457 -11.955 Td [(track)-250(of)-250(a)-250(certain)-250(number)-250(of)-250(basic)-250(issues)-250(such)-250(as:)]TJ + [-256(wher)18(e)]TJ/F67 9.9626 Tf 0 -11.956 Td [(T)]TJ/F62 9.9626 Tf 7.721 0 Td [(is)-250(a)-250(placeholder)-250(for)-250(the)-250(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ 0 g 0 G - 13.888 -20.528 Td [(\225)]TJ +/F59 9.9626 Tf -7.721 -21.431 Td [(S)]TJ 0 g 0 G - [-500(The)-250(value)-250(of)-250(the)-250(communication)-250(context;)]TJ +/F62 9.9626 Tf 11.068 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ 0 g 0 G - 155.477 -29.888 Td [(9)]TJ +/F59 9.9626 Tf -11.068 -21.934 Td [(D)]TJ +0 g 0 G +/F62 9.9626 Tf 13.28 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ +0 g 0 G +/F59 9.9626 Tf -13.28 -21.934 Td [(C)]TJ +0 g 0 G +/F62 9.9626 Tf 12.174 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ +0 g 0 G +/F59 9.9626 Tf -12.174 -21.934 Td [(Z)]TJ +0 g 0 G +/F62 9.9626 Tf 11.626 0 Td [(Double)-250(pr)18(ecision)-250(complex;)]TJ +0 g 0 G +/F59 9.9626 Tf -11.626 -21.934 Td [(LS,LD,LC,LZ)]TJ +0 g 0 G +/F62 9.9626 Tf 65.026 0 Td [(Same)-214(numeric)-214(type)-215(as)-214(above,)-221(but)-214(with)]TJ/F67 9.9626 Tf 168.016 0 Td [(psb_lpk_)]TJ/F62 9.9626 Tf 43.978 0 Td [(integer)-214(indices.)]TJ +0 g 0 G + -110.146 -29.888 Td [(16)]TJ 0 g 0 G ET endstream endobj -894 0 obj +1050 0 obj << -/Length 6070 +/Length 7385 >> stream 0 g 0 G 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 164.593 706.129 Td [(\225)]TJ -0 g 0 G - [-500(The)-236(number)-236(of)-235(indices)-236(in)-236(the)-236(index)-236(space,)-238(i.e.)-306(global)-236(number)-235(of)-236(r)18(ows)-236(and)]TJ 11.018 -11.955 Td [(columns)-250(of)-250(a)-250(sparse)-250(matrix;)]TJ -0 g 0 G - -11.018 -20.409 Td [(\225)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +q +1 0 0 1 99.895 671.26 cm +0 0 343.711 38.854 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-500(The)-250(local)-250(set)-250(of)-250(indices,)-250(including:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 22.974 -20.408 Td [(\226)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 112.299 699.454 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(local)-250(indices)-250(\050and)-250(local)-250(r)18(ows\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -9.962 -16.182 Td [(\226)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(number)-250(of)-250(halo)-250(indices)-250(\050and)-250(ther)18(efor)18(e)-250(local)-250(columns\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -9.962 -16.181 Td [(\226)]TJ + [-525(psb_Tspmat_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(global)-250(indices)-250(corr)18(esponding)-250(to)-250(the)-250(local)-250(ones.)]TJ -46.824 -20.409 Td [(Ther)18(e)-301(ar)18(e)-301(many)-301(dif)18(fer)18(ent)-301(schemes)-301(for)-301(storing)-301(these)-301(data;)-326(ther)18(efor)18(e)-301(ther)18(e)-301(ar)18(e)-301(a)]TJ 0 -11.955 Td [(number)-299(of)-299(types)-300(extending)-299(the)-299(base)-299(one,)-312(and)-299(the)-299(descriptor)-300(str)8(uctur)18(e)-299(holds)-299(a)]TJ 0 -11.955 Td [(polymorphic)-212(object)-213(whose)-212(dynamic)-212(type)-213(can)-212(be)-212(any)-213(of)-212(the)-212(extended)-213(types.)-297(The)]TJ 0 -11.955 Td [(methods)-250(associated)-250(with)-250(this)-250(data)-250(type)-250(answer)-250(the)-250(following)-250(queries:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.414 -10.959 Td [(class)]TJ 0 g 0 G - 13.888 -20.288 Td [(\225)]TJ + [(\050psb_T_base_sparse_mat\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(For)-411(a)-412(given)-411(set)-412(of)-411(local)-411(indices,)-452(\002nd)-412(the)-411(corr)18(esponding)-411(indices)-412(in)-411(the)]TJ 11.018 -11.955 Td [(global)-250(numbering;)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - -11.018 -20.408 Td [(\225)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(For)-357(a)-357(given)-357(set)-357(of)-358(global)-357(indices,)-384(\002nd)-357(the)-357(corr)18(esponding)-357(indices)-357(in)-357(the)]TJ 11.018 -11.955 Td [(local)-250(numbering,)-250(if)-250(any)111(,)-250(or)-250(r)18(eturn)-250(an)-250(invalid)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ 0 g 0 G - -11.018 -20.409 Td [(\225)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(Add)-250(a)-250(global)-250(index)-250(to)-250(the)-250(set)-250(of)-250(halo)-250(indices;)]TJ + [-525(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.408 Td [(\225)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(type)]TJ 0 g 0 G - [-500(Find)-250(the)-250(pr)18(ocess)-250(owner)-250(of)-250(each)-250(member)-250(of)-250(a)-250(set)-250(of)-250(global)-250(indices.)]TJ -13.888 -20.288 Td [(All)-295(methods)-295(but)-294(the)-295(last)-295(ar)18(e)-295(pur)18(ely)-295(local;)-317(the)-295(last)-295(method)-294(potentially)-295(r)18(equir)18(es)]TJ 0 -11.955 Td [(communication)-418(among)-419(pr)18(ocesses,)-460(and)-419(thus)-418(is)-418(a)-419(synchr)18(onous)-418(method.)-815(The)]TJ 0 -11.955 Td [(choice)-244(of)-244(a)-244(speci\002c)-244(dynamic)-244(type)-244(for)-244(the)-244(index)-244(map)-244(is)-244(made)-244(at)-244(the)-244(time)-244(the)-244(de-)]TJ 0 -11.955 Td [(scriptor)-210(is)-211(init)1(ially)-211(allocated,)-218(accor)18(ding)-210(to)-211(t)1(he)-211(mode)-210(of)-210(initialization)-211(\050see)-210(also)]TJ -0 0 1 rg 0 0 1 RG - [-210(6)]TJ + [-1050(psb_Tspmat_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [(\051.)]TJ 14.944 -12.076 Td [(The)-250(descriptor)-250(contents)-250(ar)18(e)-250(as)-250(follows:)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -20.288 Td [(indxmap)]TJ +/F62 9.9626 Tf 4.295 -41.429 Td [(Listing)-250(2:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(sparse)-250(matrix.)]TJ -16.699 -32.661 Td [(The)-190(actual)-190(data)-190(is)-190(contained)-190(in)-190(the)-190(polymorphic)-190(component)]TJ/F67 9.9626 Tf 259.484 0 Td [(a%a)]TJ/F62 9.9626 Tf 17.584 0 Td [(of)-190(type)]TJ/F67 9.9626 Tf 32.089 0 Td [(psb)]TJ +ET +q +1 0 0 1 425.371 603.645 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 428.509 603.446 Td [(T)]TJ +ET +q +1 0 0 1 434.367 603.645 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 437.505 603.446 Td [(base)]TJ +ET +q +1 0 0 1 459.054 603.645 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 462.193 603.446 Td [(sparse)]TJ +ET +q +1 0 0 1 494.202 603.645 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 497.341 603.446 Td [(mat)]TJ/F62 9.9626 Tf 15.691 0 Td [(;)]TJ -413.137 -11.955 Td [(its)-306(speci)1(\002c)-306(layout)-305(can)-306(be)-305(chosen)-306(dynamically)-305(among)-306(the)-305(pr)18(ede\002ned)-306(types,)-319(or)]TJ 0 -11.955 Td [(an)-305(entir)18(ely)-305(new)-305(storage)-305(layout)-305(can)-305(be)-305(implemented)-304(and)-305(passed)-305(to)-305(the)-305(library)]TJ 0 -11.955 Td [(at)-231(r)8(untime)-231(via)-231(the)]TJ/F67 9.9626 Tf 80.145 0 Td [(psb_spasb)]TJ/F62 9.9626 Tf 49.377 0 Td [(r)18(outine.)-304(The)-231(following)-231(very)-231(common)-231(formats)-231(ar)18(e)]TJ -129.522 -11.955 Td [(pr)18(ecompiled)-250(in)-250(PSBLAS)-250(and)-250(thus)-250(ar)18(e)-250(always)-250(available:)]TJ 0 g 0 G -/F54 9.9626 Tf 45.38 0 Td [(A)-190(polymorphic)-190(variable)-190(of)-190(a)-190(type)-190(that)-190(is)-190(any)-190(extension)-190(of)-190(the)-190(indx)]TJ +/F59 9.9626 Tf 0 -19.889 Td [(psb)]TJ ET q -1 0 0 1 478.491 370.98 cm +1 0 0 1 117.091 535.936 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 481.48 370.78 Td [(map)]TJ -305.869 -11.955 Td [(type)-250(described)-250(above.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -32.363 Td [(halo)]TJ +/F59 9.9626 Tf 120.08 535.737 Td [(T)]TJ ET q -1 0 0 1 171.228 326.661 cm +1 0 0 1 127.322 535.936 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.217 326.462 Td [(index)]TJ -0 g 0 G -/F54 9.9626 Tf 30.435 0 Td [(A)-331(list)-332(of)-331(the)-332(halo)-331(and)-332(boundary)-331(elements)-332(for)-331(the)-332(curr)18(ent)-331(pr)18(ocess)]TJ -29.041 -11.955 Td [(to)-247(be)-247(exchanged)-247(with)-246(other)-247(pr)18(ocesses;)-248(for)-247(each)-247(pr)18(ocesses)-247(with)-247(whic)1(h)-247(it)-247(is)]TJ 0 -11.956 Td [(necessary)-250(to)-250(communicate:)]TJ -0 g 0 G - 9.465 -20.408 Td [(1.)]TJ -0 g 0 G - [-500(Pr)18(ocess)-250(identi\002er;)]TJ -0 g 0 G - 0 -16.182 Td [(2.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ -0 g 0 G - 0 -16.181 Td [(3.)]TJ -0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ -0 g 0 G - 0 -16.182 Td [(4.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -0 g 0 G - 0 -16.182 Td [(5.)]TJ -0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -20.408 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.409 Td [(ext)]TJ +/F59 9.9626 Tf 130.311 535.737 Td [(coo)]TJ ET q -1 0 0 1 164.583 176.799 cm +1 0 0 1 146.411 535.936 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 167.572 176.599 Td [(index)]TJ -0 g 0 G -/F54 9.9626 Tf 30.435 0 Td [(A)-216(list)-217(of)-216(element)-217(indices)-216(to)-217(be)-216(exchanged)-217(to)-216(implement)-217(the)-216(mapping)]TJ -22.396 -11.955 Td [(between)-250(a)-250(base)-250(descriptor)-250(and)-250(a)-250(descriptor)-250(with)-250(overlap.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.408 Td [(ovrlap)]TJ +/F59 9.9626 Tf 149.399 535.737 Td [(sparse)]TJ ET q -1 0 0 1 180.642 132.48 cm +1 0 0 1 178.769 535.936 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 183.631 132.281 Td [(index)]TJ +/F59 9.9626 Tf 181.758 535.737 Td [(mat)]TJ 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(A)-259(list)-259(of)-258(the)-259(overlap)-259(elements)-259(for)-259(the)-258(curr)18(ent)-259(pr)18(ocess,)-261(or)18(ganized)]TJ -38.456 -11.955 Td [(in)-250(gr)18(oups)-250(like)-250(the)-250(pr)18(evious)-250(vector:)]TJ -0 g 0 G - 141.968 -29.888 Td [(10)]TJ +/F62 9.9626 Tf 22.137 0 Td [(Coor)18(dinate)-250(storage;)]TJ 0 g 0 G +/F59 9.9626 Tf -104 -19.907 Td [(psb)]TJ ET - -endstream -endobj -905 0 obj -<< -/Length 5988 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 117.091 516.03 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F54 9.9626 Tf 134.267 706.129 Td [(1.)]TJ -0 g 0 G - [-500(Pr)18(ocess)-250(identi\002er;)]TJ -0 g 0 G - 0 -16.693 Td [(2.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ -0 g 0 G - 0 -16.694 Td [(3.)]TJ -0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(r)18(eceived;)]TJ -0 g 0 G - 0 -16.693 Td [(4.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -0 g 0 G - 0 -16.693 Td [(5.)]TJ -0 g 0 G - [-500(Indices)-250(of)-250(points)-250(to)-250(be)-250(sent;)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(ovr)]TJ +/F59 9.9626 Tf 120.08 515.83 Td [(T)]TJ ET q -1 0 0 1 115.447 596.693 cm +1 0 0 1 127.322 516.03 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 118.436 596.494 Td [(mst)]TJ +/F59 9.9626 Tf 130.311 515.83 Td [(csr)]TJ ET q -1 0 0 1 135.631 596.693 cm +1 0 0 1 143.631 516.03 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 138.62 596.494 Td [(idx)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(A)-331(list)-332(to)-331(r)18(etrieve)-331(the)-332(value)-331(of)-331(each)-332(overlap)-331(element)-331(fr)18(om)-332(the)-331(r)18(e-)]TJ -33.185 -11.956 Td [(spective)-250(master)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(vector)-250(of)-250(integer)-250(type,)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(3.3)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(ovrlap)]TJ +/F59 9.9626 Tf 146.62 515.83 Td [(sparse)]TJ ET q -1 0 0 1 129.833 551.351 cm +1 0 0 1 175.989 516.03 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 132.822 551.152 Td [(elem)]TJ -0 g 0 G -/F54 9.9626 Tf 27.118 0 Td [(For)-250(all)-250(overlap)-250(points)-250(belonging)-250(to)-250(th)-250(ecurr)18(ent)-250(pr)18(ocess:)]TJ -0 g 0 G - -25.673 -21.431 Td [(1.)]TJ -0 g 0 G - [-500(Overlap)-250(point)-250(index;)]TJ -0 g 0 G - 0 -16.693 Td [(2.)]TJ -0 g 0 G - [-500(Number)-250(of)-250(pr)18(ocesses)-250(sharing)-250(that)-250(overlap)-250(points;)]TJ -0 g 0 G - 0 -16.694 Td [(3.)]TJ +/F59 9.9626 Tf 178.978 515.83 Td [(mat)]TJ 0 g 0 G - [-500(Index)-250(of)-250(a)-250(\223master)-74(\224)-250(pr)18(ocess:)]TJ -9.465 -21.431 Td [(Speci\002ed)-250(as:)-310(an)-250(allocatable)-250(integer)-250(array)-250(of)-250(rank)-250(two.)]TJ +/F62 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(r)18(ows;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(bnd)]TJ +/F59 9.9626 Tf -101.22 -19.906 Td [(psb)]TJ ET q -1 0 0 1 118.755 453.671 cm +1 0 0 1 117.091 496.123 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 121.743 453.472 Td [(elem)]TJ -0 g 0 G -/F54 9.9626 Tf 27.119 0 Td [(A)-235(list)-235(of)-235(all)-235(boundary)-235(points,)-238(i.e.)-305(points)-235(that)-235(have)-235(a)-235(connection)-235(with)]TJ -24.06 -11.955 Td [(other)-250(pr)18(ocesses.)]TJ -24.907 -21.055 Td [(The)-393(Fortran)-394(2003)-393(declaration)-394(for)]TJ/F59 9.9626 Tf 151.232 0 Td [(psb_desc_type)]TJ/F54 9.9626 Tf 71.913 0 Td [(str)8(uctur)18(es)-393(is)-394(as)-393(follows:)-597(A)]TJ -0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F59 9.9626 Tf 120.08 495.924 Td [(T)]TJ ET q -1 0 0 1 99.895 294.955 cm -0 0 343.711 104.608 re f +1 0 0 1 127.322 496.123 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 102.884 388.902 Td [(type)]TJ -0 g 0 G - [-525(psb_desc_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 18.829 -10.959 Td [(class)]TJ -0 g 0 G - [(\050psb_indx_map\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ -0 g 0 G - [-525(indxmap)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(type)]TJ -0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ -0 g 0 G - [-525(v_halo_index)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.958 Td [(type)]TJ +/F59 9.9626 Tf 130.311 495.924 Td [(csc)]TJ +ET +q +1 0 0 1 144.179 496.123 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.168 495.924 Td [(sparse)]TJ +ET +q +1 0 0 1 176.537 496.123 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 179.526 495.924 Td [(mat)]TJ 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F62 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(columns;)]TJ -101.768 -19.889 Td [(The)-295(inner)-295(sparse)-294(matrix)-295(has)-295(an)-295(associated)-294(state,)-306(which)-295(can)-295(take)-294(the)-295(following)]TJ 0 -11.955 Td [(values:)]TJ 0 g 0 G - [-525(v_ext_index)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(type)]TJ +/F59 9.9626 Tf 0 -19.888 Td [(Build:)]TJ 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F62 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.955 Td [(this)-250(state)-250(it)-250(is)-250(possible)-250(to)-250(add)-250(nonzer)18(o)-250(entries.)]TJ 0 g 0 G - [-525(v_ovrlap_index)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(type)]TJ +/F59 9.9626 Tf -24.907 -19.907 Td [(Assembled:)]TJ 0 g 0 G - [(\050psb_i_vect_type\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F62 9.9626 Tf 58.381 0 Td [(State)-324(enter)18(ed)-325(after)-324(the)-325(assembly;)-362(computations)-324(using)-325(the)-324(sparse)]TJ -33.474 -11.955 Td [(matrix,)-250(such)-250(as)-250(matrix-vector)-250(pr)18(oducts,)-250(ar)18(e)-250(only)-250(possible)-250(in)-250(this)-250(state;)]TJ 0 g 0 G - [-525(v_ovr_mst_idx)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG - 0 -10.959 Td [(integer)]TJ +/F59 9.9626 Tf -24.907 -19.907 Td [(Update:)]TJ 0 g 0 G - [(,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F62 9.9626 Tf 40.678 0 Td [(State)-219(enter)18(ed)-220(after)-219(a)-219(r)18(einitalization;)-230(this)-219(is)-219(used)-220(to)-219(handle)-219(applications)]TJ -15.771 -11.955 Td [(in)-288(which)-288(the)-288(same)-288(sparsity)-289(pattern)-288(is)-288(used)-288(multiple)-288(times)-288(with)-288(dif)18(fer)18(ent)]TJ 0 -11.955 Td [(coef)18(\002cients.)-298(In)-213(this)-214(state)-213(it)-214(is)-213(only)-214(possible)-213(to)-214(enter)-213(coef)18(\002cients)-214(for)-213(alr)18(eady)]TJ 0 -11.956 Td [(existing)-250(nonzer)18(o)-250(entries.)]TJ -24.907 -19.888 Td [(The)-293(only)-292(storage)-293(variant)-292(supporting)-293(the)-293(build)-292(state)-293(is)-292(COO;)-293(all)-293(other)-292(variants)]TJ 0 -11.955 Td [(ar)18(e)-250(obtained)-250(by)-250(conversion)-250(to/fr)18(om)-250(it.)]TJ/F59 9.9626 Tf 0 -27.132 Td [(3.2.1)-1000(Sparse)-250(Matrix)-250(Methods)]TJ 0 -18.964 Td [(3.2.2)-1000(get)]TJ +ET +q +1 0 0 1 144.219 266.863 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 266.663 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-1050(::)]TJ 0 g 0 G - [-525(ovrlap_elem\050:,:\051)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG - 0 -10.959 Td [(integer)]TJ +/F67 9.9626 Tf -47.313 -18.963 Td [(nr)-525(=)-525(a%get_nrows\050\051)]TJ 0 g 0 G - [(,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F59 9.9626 Tf 0 -21.872 Td [(T)90(ype:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-1050(::)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-525(bnd_elem\050:\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -18.829 -10.959 Td [(end)-525(type)]TJ +/F59 9.9626 Tf -29.828 -19.907 Td [(On)-250(Entry)]TJ 0 g 0 G - [-525(psb_desc_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G + 0 -19.907 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf -2.989 -41.43 Td [(Listing)-259(1:)-327(The)-259(PSBLAS)-259(de\002ned)-259(data)-258(type)-259(that)-259(contains)-259(the)-258(communication)-259(de-)]TJ 0 -11.955 Td [(scriptor)74(.)]TJ 0 -25.259 Td [(communication)-319(descriptor)-320(associated)-319(with)-319(a)-320(sparse)-319(matrix)-320(has)-319(a)-319(state,)-337(which)]TJ 0 -11.955 Td [(can)-250(take)-250(the)-250(following)-250(values:)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.054 Td [(Build:)]TJ + -56.339 -33.827 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.956 Td [(this)-220(state)-220(it)-220(is)-220(possible)-220(to)-220(add)-220(communication)-220(r)18(equir)18(ements)-220(among)-220(dif)18(fer)18(-)]TJ 0 -11.955 Td [(ent)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.431 Td [(Assembled:)]TJ + 0 -19.906 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 58.381 0 Td [(State)-308(enter)18(ed)-308(after)-308(the)-309(assembly;)-337(computations)-308(using)-308(the)-308(associ-)]TJ -33.474 -11.955 Td [(ated)-310(sparse)-310(matrix,)-325(such)-310(as)-310(matrix-vector)-309(pr)18(oducts,)-325(ar)18(e)-310(only)-310(possible)-310(in)]TJ 0 -11.955 Td [(this)-250(state.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(sparse)-250(matrix)]TJ/F67 9.9626 Tf 165.298 0 Td [(a)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G - 141.968 -29.888 Td [(11)]TJ + -76.431 -29.888 Td [(17)]TJ 0 g 0 G ET endstream endobj -921 0 obj +1055 0 obj << -/Length 4957 +/Length 3860 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.1.1)-1000(Descriptor)-250(Methods)]TJ 0 -19 Td [(3.1.2)-1000(get)]TJ -ET -q -1 0 0 1 195.029 687.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 198.017 687.129 Td [(local)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(3.2.3)-1000(get)]TJ ET q -1 0 0 1 220.194 687.328 cm +1 0 0 1 195.029 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 223.183 687.129 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(rows)]TJ -0 g 0 G +/F59 9.9626 Tf 198.017 706.129 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -/F59 9.9626 Tf -72.478 -19 Td [(nr)-525(=)-525(desc%get_local_rows\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf -47.312 -19.023 Td [(nc)-525(=)-525(a%get_ncols\050\051)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.001 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -20.049 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -20 Td [(desc)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + 0 -20.048 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ 0 g 0 G + -56.338 -33.965 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -20 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-399(number)-398(of)-399(local)-398(r)18(ows,)-436(i.e.)-756(the)-398(number)-399(of)-399(r)18(ows)-398(owned)]TJ -47.87 -11.956 Td [(by)-350(the)-349(curr)18(ent)-350(pr)18(ocess;)-399(as)-350(explained)-350(in)]TJ -0 0 1 rg 0 0 1 RG - [-349(1)]TJ + 0 -20.048 Td [(Function)-250(value)]TJ 0 g 0 G - [(,)-375(it)-350(is)-349(equal)-350(to)]TJ/F83 10.3811 Tf 249.705 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.433 0 Td [(+)]TJ/F83 10.3811 Tf 10.624 0 Td [(j)-23(B)]TJ/F52 7.5716 Tf 10.108 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.003 0 Td [(.)-609(The)]TJ -293.569 -11.955 Td [(r)18(eturned)-250(value)-250(is)-250(speci\002c)-250(to)-250(the)-250(calling)-250(pr)18(ocess.)]TJ/F51 9.9626 Tf -24.906 -27.247 Td [(3.1.3)-1000(get)]TJ -ET -q -1 0 0 1 195.029 489.311 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 198.017 489.112 Td [(local)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(sparse)-250(matrix)]TJ/F67 9.9626 Tf 181.158 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ/F59 9.9626 Tf -259.165 -27.315 Td [(3.2.4)-1000(get)]TJ ET q -1 0 0 1 220.194 489.311 cm +1 0 0 1 195.029 531.915 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 223.183 489.112 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(local)-250(cols)]TJ +/F59 9.9626 Tf 198.017 531.716 Td [(nnzeros)-250(\227)-250(Get)-250(number)-250(of)-250(nonzero)-250(elements)-250(in)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -72.478 -19 Td [(nc)-525(=)-525(desc%get_local_cols\050\051)]TJ +/F67 9.9626 Tf -47.312 -19.024 Td [(nz)-525(=)-525(a%get_nnzeros\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -20.001 Td [(desc)]TJ + 0 -20.048 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ + -56.338 -33.965 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -20 Td [(Function)-250(value)]TJ + 0 -20.048 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-320(number)-320(of)-321(local)-320(cols,)-338(i.e.)-521(the)-320(number)-320(of)-321(indices)-320(used)-320(by)]TJ -47.87 -11.955 Td [(the)-322(curr)18(ent)-322(pr)18(ocess,)-340(including)-322(both)-322(local)-322(and)-322(halo)-322(indices;)-358(as)-322(explained)]TJ 0 -11.956 Td [(in)]TJ -0 0 1 rg 0 0 1 RG - [-284(1)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(nonzer)18(o)-250(elements)-250(stor)18(ed)-250(in)-250(sparse)-250(matrix)]TJ/F67 9.9626 Tf 251.284 0 Td [(a)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ/F59 9.9626 Tf -329.292 -22.041 Td [(Notes)]TJ 0 g 0 G - [(,)-294(i)1(t)-285(is)-284(equal)-285(to)]TJ/F83 10.3811 Tf 79.58 0 Td [(j)-24(I)]TJ/F52 7.5716 Tf 8.943 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.193 0 Td [(+)]TJ/F83 10.3811 Tf 10.383 0 Td [(j)-24(B)]TJ/F52 7.5716 Tf 10.109 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F85 10.3811 Tf 5.192 0 Td [(+)]TJ/F83 10.3811 Tf 10.383 0 Td [(j)-24(H)]TJ/F52 7.5716 Tf 12.052 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F54 9.9626 Tf 3.004 0 Td [(.)-413(The)-285(r)18(eturned)-284(value)-285(is)-284(speci\002c)-285(to)-284(the)]TJ -153.464 -11.955 Td [(calling)-250(pr)18(ocess.)]TJ/F51 9.9626 Tf -24.907 -27.247 Td [(3.1.4)-1000(get)]TJ -ET -q -1 0 0 1 195.029 279.339 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 198.017 279.14 Td [(global)]TJ +/F62 9.9626 Tf 12.453 -20.017 Td [(1.)]TJ +0 g 0 G + [-500(The)-395(function)-395(value)-395(is)-395(speci\002c)-395(to)-395(the)-395(storage)-395(format)-395(of)-396(matri)1(x)]TJ/F67 9.9626 Tf 295.646 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(;)-468(some)]TJ -288.422 -11.956 Td [(storage)-343(formats)-342(employ)-343(padding,)-366(thus)-343(the)-342(r)18(eturned)-343(value)-343(for)-342(the)-343(same)]TJ 0 -11.955 Td [(matrix)-250(may)-250(be)-250(dif)18(fer)18(ent)-250(for)-250(dif)18(fer)18(ent)-250(storage)-250(choices.)]TJ/F59 9.9626 Tf -24.907 -27.315 Td [(3.2.5)-1000(get)]TJ ET q -1 0 0 1 227.397 279.339 cm +1 0 0 1 195.029 291.533 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 230.386 279.14 Td [(rows)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(rows)]TJ +/F59 9.9626 Tf 198.017 291.334 Td [(size)-398(\227)-397(Get)-398(maximum)-397(number)-398(of)-398(nonzero)-397(elements)-398(in)-398(a)-397(sparse)]TJ -17.424 -11.955 Td [(matrix)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -79.681 -19 Td [(nr)-525(=)-525(desc%get_global_rows\050\051)]TJ +/F67 9.9626 Tf -29.888 -19.024 Td [(maxnz)-525(=)-525(a%get_size\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.974 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -20.001 Td [(desc)]TJ + 0 -20.048 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.011 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.929 Td [(On)-250(Return)]TJ + -56.338 -33.965 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -20 Td [(Function)-250(value)]TJ + 0 -20.048 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-351(number)-351(of)-350(global)-351(r)18(ows,)-376(i.e.)-613(the)-351(size)-351(of)-350(the)-351(global)-351(index)]TJ -47.87 -11.955 Td [(space.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-220(maximum)-220(number)-219(of)-220(nonzer)18(o)-220(elements)-220(that)-220(can)-219(be)-220(stor)18(ed)]TJ -47.87 -11.955 Td [(in)-250(sparse)-250(matrix)]TJ/F67 9.9626 Tf 73.294 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(using)-250(its)-250(curr)18(ent)-250(memory)-250(allocation.)]TJ 0 g 0 G - 141.967 -29.888 Td [(12)]TJ + 60.952 -29.888 Td [(18)]TJ 0 g 0 G ET endstream endobj -925 0 obj +1060 0 obj << -/Length 4367 +/Length 4447 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.1.5)-1000(get)]TJ -ET -q -1 0 0 1 144.219 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 147.208 706.129 Td [(global)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(3.2.6)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf 0 -20.135 Td [(memory_size)-525(=)-525(a%sizeof\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -23.732 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -35.687 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.343 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F59 9.9626 Tf -72.777 -30.58 Td [(3.2.7)-1000(get)]TJ ET q -1 0 0 1 176.587 706.328 cm +1 0 0 1 144.219 517.21 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 179.576 706.129 Td [(cols)-250(\227)-250(Get)-250(number)-250(of)-250(global)-250(cols)]TJ +/F59 9.9626 Tf 147.208 517.011 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -47.313 -20.135 Td [(write)]TJ +0 g 0 G + [(\050)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(,)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ 0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -79.681 -18.974 Td [(nr)-525(=)-525(desc%get_global_cols\050\051)]TJ + [-525(a%get_fmt\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.935 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.947 Td [(desc)]TJ + 0 -22.343 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ + -56.339 -35.686 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.947 Td [(Function)-250(value)]TJ + 0 -22.343 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(A)-244(short)-245(string)-244(describing)-245(the)-244(dynamic)-245(type)-244(of)-245(the)-244(matrix.)-308(Pr)18(e-)]TJ -47.87 -11.955 Td [(de\002ned)-250(values)-250(include)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 102.415 0 Td [(NULL)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-242(number)-242(of)-241(global)-242(cols;)-245(usually)-241(this)-242(is)-242(equal)-242(to)-241(the)-242(number)]TJ -47.87 -11.955 Td [(of)-250(global)-250(r)18(ows.)]TJ/F51 9.9626 Tf -24.907 -27.172 Td [(3.1.6)-1000(get)]TJ +/F62 9.9626 Tf 20.921 0 Td [(,)]TJ/F67 9.9626 Tf 4.981 0 Td [(COO)]TJ/F62 9.9626 Tf 15.691 0 Td [(,)]TJ/F67 9.9626 Tf 4.982 0 Td [(CSR)]TJ/F62 9.9626 Tf 18.181 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(CSC)]TJ/F62 9.9626 Tf 15.691 0 Td [(.)]TJ/F59 9.9626 Tf -227.127 -30.581 Td [(3.2.8)-1000(is)]TJ ET q -1 0 0 1 144.219 520.607 cm +1 0 0 1 138.122 315.533 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 147.208 520.408 Td [(global)]TJ +/F59 9.9626 Tf 141.111 315.333 Td [(bld,)-250(is)]TJ ET q -1 0 0 1 176.587 520.607 cm +1 0 0 1 169.922 315.533 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 179.576 520.408 Td [(indices)-250(\227)-250(Get)-250(vector)-250(of)-250(global)-250(indices)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -79.681 -18.974 Td [(myidx)-525(=)-525(desc%get_global_indices\050[owned]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf 172.911 315.333 Td [(upd,)-250(is)]TJ +ET +q +1 0 0 1 204.493 315.533 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 207.482 315.333 Td [(asb)-250(\227)-250(Status)-250(check)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -107.587 -20.135 Td [(if)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.947 Td [(desc)]TJ + [-525(\050a%is_bld\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -91.287 -31.902 Td [(owned)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ 0 g 0 G -/F54 9.9626 Tf 35.975 0 Td [(Choose)-330(if)-329(you)-330(only)-329(want)-330(owned)-330(indices)-329(\050)]TJ/F59 9.9626 Tf 185.766 0 Td [(owned)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.true.)]TJ/F54 9.9626 Tf 62.764 0 Td [(\051)-330(or)-329(also)-330(halo)]TJ -259.598 -11.955 Td [(indices)-250(\050)]TJ/F59 9.9626 Tf 36.911 0 Td [(owned)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(\050a%is_upd\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 67.995 0 Td [(\051.)-310(Scope:)]TJ/F51 9.9626 Tf 40.328 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -166.813 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(;)-250(default:)]TJ/F59 9.9626 Tf 41.872 0 Td [(.true.)]TJ/F54 9.9626 Tf 31.382 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -163.436 -33.89 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.947 Td [(Function)-250(value)]TJ + [-525(\050a%is_asb\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-277(global)-277(indices,)-284(r)18(eturned)-277(as)-277(an)-277(allocatable)-277(integer)-277(array)-277(of)]TJ -47.87 -11.955 Td [(kind)]TJ/F59 9.9626 Tf 22.814 0 Td [(psb_lpk_)]TJ/F54 9.9626 Tf 44.334 0 Td [(and)-250(rank)-250(1.)]TJ/F51 9.9626 Tf -92.055 -27.171 Td [(3.1.7)-1000(get)]TJ -ET -q -1 0 0 1 144.219 267.119 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 147.208 266.92 Td [(context)-250(\227)-250(Get)-250(communication)-250(context)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -47.313 -18.975 Td [(ctxt)-525(=)-525(desc%get_context\050\051)]TJ +/F59 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.934 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.947 Td [(On)-250(Entry)]TJ 0 g 0 G + 0 -22.343 Td [(a)]TJ 0 g 0 G - 0 -19.947 Td [(desc)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + -56.339 -35.686 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -33.889 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -22.343 Td [(Function)-250(value)]TJ 0 g 0 G - 0 -19.947 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 72.777 0 Td [(A)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F67 9.9626 Tf 9.966 0 Td [(logical)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(communication)-250(context.)]TJ +/F62 9.9626 Tf 38.827 0 Td [(value)-222(indicating)-223(whether)-222(the)-222(matrix)-223(is)-222(in)-222(the)-223(Build,)]TJ -96.663 -11.955 Td [(Update)-250(or)-250(Assembled)-250(state,)-250(r)18(espectively)111(.)]TJ 0 g 0 G - 94.098 -29.888 Td [(13)]TJ + 141.968 -29.888 Td [(19)]TJ 0 g 0 G ET endstream endobj -829 0 obj +972 0 obj << /Type /ObjStm /N 100 -/First 875 -/Length 9086 ->> -stream -828 0 818 69 819 217 824 364 825 421 19 478 821 534 835 681 832 823 833 970 -837 1117 834 1173 840 1266 842 1380 23 1437 843 1493 844 1550 845 1607 846 1664 847 1721 -848 1778 849 1835 850 1892 839 1949 853 2068 838 2202 855 2351 856 2407 857 2463 858 2519 -859 2575 860 2631 861 2687 862 2743 863 2799 864 2855 865 2911 866 2967 867 3023 868 3079 -869 3135 852 3192 874 3285 851 3427 872 3579 876 3726 27 3783 877 3839 878 3896 879 3951 -880 4007 881 4064 882 4121 31 4178 873 4234 885 4353 883 4487 887 4634 35 4690 39 4745 -888 4800 884 4856 893 4949 889 5099 890 5245 891 5397 895 5549 896 5606 897 5663 898 5720 -899 5777 900 5834 892 5891 904 5971 901 6113 902 6265 906 6417 907 6473 908 6529 909 6585 -910 6641 911 6697 912 6753 913 6809 914 6865 916 6921 903 6977 920 7083 917 7225 918 7372 -922 7518 43 7575 47 7631 51 7687 55 7743 919 7799 924 7931 926 8045 59 8101 63 8156 -% 828 0 obj +/First 913 +/Length 9266 +>> +stream +969 0 952 57 975 150 951 292 973 444 977 591 27 648 978 704 979 761 980 816 +981 872 982 929 983 986 31 1043 974 1099 986 1218 984 1352 988 1499 35 1555 39 1610 +989 1665 985 1721 994 1814 990 1964 991 2110 992 2262 996 2414 997 2471 998 2528 999 2585 +1000 2642 1001 2700 993 2758 1005 2838 1002 2985 1003 3138 1007 3291 1008 3349 1009 3407 1010 3465 +1011 3523 1012 3581 1013 3639 1014 3697 1015 3755 1017 3813 1004 3871 1021 3980 1018 4127 1019 4275 +1023 4422 43 4481 47 4538 51 4595 55 4652 1020 4709 1025 4842 1027 4960 59 5018 63 5074 +67 5130 1024 5186 1030 5280 1032 5398 71 5457 75 5514 1033 5571 79 5630 83 5685 1029 5742 +1035 5836 1037 5954 87 6012 91 6068 95 6124 1034 6180 1042 6274 1038 6431 1039 6589 1040 6741 +1044 6889 99 6948 103 7005 1045 7063 1041 7122 1049 7216 1051 7334 1047 7392 1052 7450 107 7509 +111 7566 1048 7623 1054 7732 1056 7850 115 7909 119 7967 1057 8025 123 8084 1053 8141 1059 8235 +% 969 0 obj +<< +/D [953 0 R /XYZ 114.242 139.255 null] +>> +% 952 0 obj << -/BaseFont /Times-Roman -/Type /Font -/Subtype /Type1 +/Font << /F62 667 0 R /F67 913 0 R /F97 970 0 R >> +/ProcSet [ /PDF /Text ] >> -% 818 0 obj +% 975 0 obj +<< +/Type /Page +/Contents 976 0 R +/Resources 974 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 971 0 R +/Annots [ 951 0 R 973 0 R ] +>> +% 951 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [327.281 638.309 334.255 647.315] -/A << /S /GoTo /D (cite.2007c) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [269.647 674.054 284.093 683.464] +/A << /S /GoTo /D (subsection.2.3) >> +>> +% 973 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [168.073 254.848 175.046 266.907] +/A << /S /GoTo /D (section.3) >> +>> +% 977 0 obj +<< +/D [975 0 R /XYZ 149.705 753.953 null] +>> +% 27 0 obj +<< +/D [975 0 R /XYZ 150.705 716.092 null] +>> +% 978 0 obj +<< +/D [975 0 R /XYZ 150.705 671.065 null] +>> +% 979 0 obj +<< +/D [975 0 R /XYZ 150.705 648.1 null] +>> +% 980 0 obj +<< +/D [975 0 R /XYZ 150.705 573.59 null] +>> +% 981 0 obj +<< +/D [975 0 R /XYZ 150.705 516.424 null] +>> +% 982 0 obj +<< +/D [975 0 R /XYZ 150.705 483.864 null] +>> +% 983 0 obj +<< +/D [975 0 R /XYZ 150.705 463.343 null] +>> +% 31 0 obj +<< +/D [975 0 R /XYZ 150.705 408.307 null] +>> +% 974 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 986 0 obj +<< +/Type /Page +/Contents 987 0 R +/Resources 985 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 971 0 R +/Annots [ 984 0 R ] +>> +% 984 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [268.105 173.115 275.079 185.175] +/A << /S /GoTo /D (section.6) >> +>> +% 988 0 obj +<< +/D [986 0 R /XYZ 98.895 753.953 null] +>> +% 35 0 obj +<< +/D [986 0 R /XYZ 99.895 716.092 null] +>> +% 39 0 obj +<< +/D [986 0 R /XYZ 99.895 279.545 null] +>> +% 989 0 obj +<< +/D [986 0 R /XYZ 342.47 236.897 null] +>> +% 985 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 994 0 obj +<< +/Type /Page +/Contents 995 0 R +/Resources 993 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 971 0 R +/Annots [ 990 0 R 991 0 R 992 0 R ] +>> +% 990 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [482.63 399.338 489.604 411.398] +/A << /S /GoTo /D (section.6) >> +>> +% 991 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [354.987 193.202 369.432 205.262] +/A << /S /GoTo /D (subsection.3.3) >> +>> +% 992 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [354.987 148.883 369.432 160.943] +/A << /S /GoTo /D (subsection.3.3) >> +>> +% 996 0 obj +<< +/D [994 0 R /XYZ 149.705 753.953 null] >> -% 819 0 obj +% 997 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [337.243 638.21 344.217 647.166] -/A << /S /GoTo /D (cite.2007d) >> +/D [994 0 R /XYZ 150.705 293.402 null] >> -% 824 0 obj +% 998 0 obj << -/D [822 0 R /XYZ 149.705 753.953 null] +/D [994 0 R /XYZ 150.705 278.496 null] >> -% 825 0 obj +% 999 0 obj << -/D [822 0 R /XYZ 150.705 353.614 null] +/D [994 0 R /XYZ 150.705 261.039 null] >> -% 19 0 obj +% 1000 0 obj << -/D [822 0 R /XYZ 150.705 270.035 null] +/D [994 0 R /XYZ 150.705 244.857 null] >> -% 821 0 obj +% 1001 0 obj +<< +/D [994 0 R /XYZ 150.705 228.675 null] +>> +% 993 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R >> -/XObject << /Im3 820 0 R >> +/Font << /F62 667 0 R /F59 665 0 R >> /ProcSet [ /PDF /Text ] >> -% 835 0 obj +% 1005 0 obj << /Type /Page -/Contents 836 0 R -/Resources 834 0 R +/Contents 1006 0 R +/Resources 1004 0 R /MediaBox [0 0 595.276 841.89] -/Parent 780 0 R -/Annots [ 832 0 R 833 0 R ] +/Parent 971 0 R +/Annots [ 1002 0 R 1003 0 R ] >> -% 832 0 obj +% 1002 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [404.739 354.196 411.713 366.255] -/A << /S /GoTo /D (section.3) >> +/Rect [304.177 614.119 318.623 626.179] +/A << /S /GoTo /D (subsection.3.3) >> >> -% 833 0 obj +% 1003 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [172.593 318.033 179.567 330.093] -/A << /S /GoTo /D (section.6) >> ->> -% 837 0 obj -<< -/D [835 0 R /XYZ 98.895 753.953 null] ->> -% 834 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 840 0 obj -<< -/Type /Page -/Contents 841 0 R -/Resources 839 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 780 0 R +/Rect [304.177 568.778 318.623 580.837] +/A << /S /GoTo /D (subsection.3.3) >> >> -% 842 0 obj +% 1007 0 obj << -/D [840 0 R /XYZ 149.705 753.953 null] +/D [1005 0 R /XYZ 98.895 753.953 null] >> -% 23 0 obj +% 1008 0 obj << -/D [840 0 R /XYZ 150.705 716.092 null] +/D [1005 0 R /XYZ 99.895 716.092 null] >> -% 843 0 obj +% 1009 0 obj << -/D [840 0 R /XYZ 150.705 282.521 null] +/D [1005 0 R /XYZ 99.895 702.226 null] >> -% 844 0 obj +% 1010 0 obj << -/D [840 0 R /XYZ 150.705 261.733 null] +/D [1005 0 R /XYZ 99.895 684.257 null] >> -% 845 0 obj +% 1011 0 obj << -/D [840 0 R /XYZ 150.705 240.946 null] +/D [1005 0 R /XYZ 99.895 667.564 null] >> -% 846 0 obj +% 1012 0 obj << -/D [840 0 R /XYZ 150.705 220.159 null] +/D [1005 0 R /XYZ 99.895 650.871 null] >> -% 847 0 obj +% 1013 0 obj << -/D [840 0 R /XYZ 150.705 188.012 null] +/D [1005 0 R /XYZ 99.895 541.236 null] >> -% 848 0 obj +% 1014 0 obj << -/D [840 0 R /XYZ 150.705 167.072 null] +/D [1005 0 R /XYZ 99.895 524.542 null] >> -% 849 0 obj +% 1015 0 obj << -/D [840 0 R /XYZ 150.705 148.646 null] +/D [1005 0 R /XYZ 99.895 507.849 null] >> -% 850 0 obj +% 1017 0 obj << -/D [840 0 R /XYZ 150.705 132.275 null] +/D [1005 0 R /XYZ 99.895 288.977 null] >> -% 839 0 obj +% 1004 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R /F85 814 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F102 1016 0 R >> /ProcSet [ /PDF /Text ] >> -% 853 0 obj +% 1021 0 obj << /Type /Page -/Contents 854 0 R -/Resources 852 0 R +/Contents 1022 0 R +/Resources 1020 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 838 0 R ] +/Parent 971 0 R +/Annots [ 1018 0 R 1019 0 R ] >> -% 838 0 obj +% 1018 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [176.109 690.964 182.386 703.958] -/A << /S /GoTo /D (Hfootnote.3) >> ->> -% 855 0 obj -<< -/D [853 0 R /XYZ 98.895 753.953 null] ->> -% 856 0 obj -<< -/D [853 0 R /XYZ 99.895 716.092 null] ->> -% 857 0 obj -<< -/D [853 0 R /XYZ 99.895 686.784 null] +/Rect [352.861 524.509 359.835 537.101] +/A << /S /GoTo /D (section.1) >> >> -% 858 0 obj +% 1019 0 obj << -/D [853 0 R /XYZ 99.895 618.259 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [186.147 314.537 193.12 327.129] +/A << /S /GoTo /D (section.1) >> >> -% 859 0 obj +% 1023 0 obj << -/D [853 0 R /XYZ 99.895 595.952 null] +/D [1021 0 R /XYZ 149.705 753.953 null] >> -% 860 0 obj +% 43 0 obj << -/D [853 0 R /XYZ 99.895 573.645 null] +/D [1021 0 R /XYZ 150.705 716.092 null] >> -% 861 0 obj +% 47 0 obj << -/D [853 0 R /XYZ 99.895 539.978 null] +/D [1021 0 R /XYZ 150.705 696.532 null] >> -% 862 0 obj +% 51 0 obj << -/D [853 0 R /XYZ 99.895 517.075 null] +/D [1021 0 R /XYZ 150.705 498.276 null] >> -% 863 0 obj +% 55 0 obj << -/D [853 0 R /XYZ 99.895 494.768 null] +/D [1021 0 R /XYZ 150.705 288.305 null] >> -% 864 0 obj +% 1020 0 obj << -/D [853 0 R /XYZ 99.895 469.873 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F60 666 0 R /F93 915 0 R >> +/ProcSet [ /PDF /Text ] >> -% 865 0 obj +% 1025 0 obj << -/D [853 0 R /XYZ 99.895 442.062 null] +/Type /Page +/Contents 1026 0 R +/Resources 1024 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1028 0 R >> -% 866 0 obj +% 1027 0 obj << -/D [853 0 R /XYZ 99.895 412.296 null] +/D [1025 0 R /XYZ 98.895 753.953 null] >> -% 867 0 obj +% 59 0 obj << -/D [853 0 R /XYZ 99.895 395.165 null] +/D [1025 0 R /XYZ 99.895 716.092 null] >> -% 868 0 obj +% 63 0 obj << -/D [853 0 R /XYZ 99.895 377.438 null] +/D [1025 0 R /XYZ 99.895 529.559 null] >> -% 869 0 obj +% 67 0 obj << -/D [853 0 R /XYZ 114.242 139.255 null] +/D [1025 0 R /XYZ 99.895 276.666 null] >> -% 852 0 obj +% 1024 0 obj << -/Font << /F54 586 0 R /F59 812 0 R /F89 870 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 874 0 obj +% 1030 0 obj << /Type /Page -/Contents 875 0 R -/Resources 873 0 R +/Contents 1031 0 R +/Resources 1029 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 851 0 R 872 0 R ] ->> -% 851 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [269.647 674.054 284.093 683.464] -/A << /S /GoTo /D (subsection.2.3) >> ->> -% 872 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [168.073 254.848 175.046 266.907] -/A << /S /GoTo /D (section.3) >> ->> -% 876 0 obj -<< -/D [874 0 R /XYZ 149.705 753.953 null] ->> -% 27 0 obj -<< -/D [874 0 R /XYZ 150.705 716.092 null] ->> -% 877 0 obj -<< -/D [874 0 R /XYZ 150.705 671.065 null] +/Parent 1028 0 R >> -% 878 0 obj +% 1032 0 obj << -/D [874 0 R /XYZ 150.705 648.1 null] +/D [1030 0 R /XYZ 149.705 753.953 null] >> -% 879 0 obj +% 71 0 obj << -/D [874 0 R /XYZ 150.705 573.59 null] +/D [1030 0 R /XYZ 150.705 716.092 null] >> -% 880 0 obj +% 75 0 obj << -/D [874 0 R /XYZ 150.705 516.424 null] +/D [1030 0 R /XYZ 150.705 519.544 null] >> -% 881 0 obj +% 1033 0 obj << -/D [874 0 R /XYZ 150.705 483.864 null] +/D [1030 0 R /XYZ 397.537 356.277 null] >> -% 882 0 obj +% 79 0 obj << -/D [874 0 R /XYZ 150.705 463.343 null] +/D [1030 0 R /XYZ 150.705 305.6 null] >> -% 31 0 obj +% 83 0 obj << -/D [874 0 R /XYZ 150.705 408.307 null] +/D [1030 0 R /XYZ 150.705 194.578 null] >> -% 873 0 obj +% 1029 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 885 0 obj +% 1035 0 obj << /Type /Page -/Contents 886 0 R -/Resources 884 0 R +/Contents 1036 0 R +/Resources 1034 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 883 0 R ] ->> -% 883 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [268.105 173.115 275.079 185.175] -/A << /S /GoTo /D (section.6) >> +/Parent 1028 0 R >> -% 887 0 obj +% 1037 0 obj << -/D [885 0 R /XYZ 98.895 753.953 null] +/D [1035 0 R /XYZ 98.895 753.953 null] >> -% 35 0 obj +% 87 0 obj << -/D [885 0 R /XYZ 99.895 716.092 null] +/D [1035 0 R /XYZ 99.895 583.842 null] >> -% 39 0 obj +% 91 0 obj << -/D [885 0 R /XYZ 99.895 279.545 null] +/D [1035 0 R /XYZ 99.895 466.211 null] >> -% 888 0 obj +% 95 0 obj << -/D [885 0 R /XYZ 342.47 236.897 null] +/D [1035 0 R /XYZ 99.895 256.035 null] >> -% 884 0 obj +% 1034 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 893 0 obj +% 1042 0 obj << /Type /Page -/Contents 894 0 R -/Resources 892 0 R +/Contents 1043 0 R +/Resources 1041 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 889 0 R 890 0 R 891 0 R ] +/Parent 1028 0 R +/Annots [ 1038 0 R 1039 0 R 1040 0 R ] >> -% 889 0 obj +% 1038 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [482.63 399.338 489.604 411.398] -/A << /S /GoTo /D (section.6) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [187.544 240.393 199.499 249.399] +/A << /S /GoTo /D (cite.DesignPatterns) >> >> -% 890 0 obj +% 1039 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [354.987 193.202 369.432 205.262] -/A << /S /GoTo /D (subsection.3.3) >> +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [267.981 240.293 279.936 249.399] +/A << /S /GoTo /D (cite.Sparse03) >> >> -% 891 0 obj +% 1040 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [354.987 148.883 369.432 160.943] -/A << /S /GoTo /D (subsection.3.3) >> ->> -% 895 0 obj -<< -/D [893 0 R /XYZ 149.705 753.953 null] ->> -% 896 0 obj -<< -/D [893 0 R /XYZ 150.705 293.402 null] +/Rect [458.483 237.643 465.457 249.703] +/A << /S /GoTo /D (listing.2) >> >> -% 897 0 obj +% 1044 0 obj << -/D [893 0 R /XYZ 150.705 278.496 null] +/D [1042 0 R /XYZ 149.705 753.953 null] >> -% 898 0 obj +% 99 0 obj << -/D [893 0 R /XYZ 150.705 261.039 null] +/D [1042 0 R /XYZ 150.705 544.277 null] >> -% 899 0 obj +% 103 0 obj << -/D [893 0 R /XYZ 150.705 244.857 null] +/D [1042 0 R /XYZ 150.705 296.936 null] >> -% 900 0 obj +% 1045 0 obj << -/D [893 0 R /XYZ 150.705 228.675 null] +/D [1042 0 R /XYZ 170.328 265.359 null] >> -% 892 0 obj +% 1041 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 904 0 obj +% 1049 0 obj << /Type /Page -/Contents 905 0 R -/Resources 903 0 R +/Contents 1050 0 R +/Resources 1048 0 R /MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 901 0 R 902 0 R ] ->> -% 901 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [304.177 614.119 318.623 626.179] -/A << /S /GoTo /D (subsection.3.3) >> ->> -% 902 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [304.177 568.778 318.623 580.837] -/A << /S /GoTo /D (subsection.3.3) >> +/Parent 1028 0 R >> -% 906 0 obj +% 1051 0 obj << -/D [904 0 R /XYZ 98.895 753.953 null] +/D [1049 0 R /XYZ 98.895 753.953 null] >> -% 907 0 obj +% 1047 0 obj << -/D [904 0 R /XYZ 99.895 716.092 null] +/D [1049 0 R /XYZ 99.895 665.282 null] >> -% 908 0 obj +% 1052 0 obj << -/D [904 0 R /XYZ 99.895 702.226 null] +/D [1049 0 R /XYZ 409.052 603.446 null] >> -% 909 0 obj +% 107 0 obj << -/D [904 0 R /XYZ 99.895 684.257 null] +/D [1049 0 R /XYZ 99.895 294.773 null] >> -% 910 0 obj +% 111 0 obj << -/D [904 0 R /XYZ 99.895 667.564 null] +/D [1049 0 R /XYZ 99.895 276.048 null] >> -% 911 0 obj +% 1048 0 obj << -/D [904 0 R /XYZ 99.895 650.871 null] +/Font << /F102 1016 0 R /F62 667 0 R /F67 913 0 R /F59 665 0 R >> +/ProcSet [ /PDF /Text ] >> -% 912 0 obj +% 1054 0 obj << -/D [904 0 R /XYZ 99.895 541.236 null] +/Type /Page +/Contents 1055 0 R +/Resources 1053 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1028 0 R >> -% 913 0 obj +% 1056 0 obj << -/D [904 0 R /XYZ 99.895 524.542 null] +/D [1054 0 R /XYZ 149.705 753.953 null] >> -% 914 0 obj +% 115 0 obj << -/D [904 0 R /XYZ 99.895 507.849 null] +/D [1054 0 R /XYZ 150.705 716.092 null] >> -% 916 0 obj +% 119 0 obj << -/D [904 0 R /XYZ 99.895 288.977 null] +/D [1054 0 R /XYZ 150.705 540.892 null] >> -% 903 0 obj +% 1057 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F94 915 0 R >> -/ProcSet [ /PDF /Text ] +/D [1054 0 R /XYZ 150.705 358.382 null] >> -% 920 0 obj +% 123 0 obj << -/Type /Page -/Contents 921 0 R -/Resources 919 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 871 0 R -/Annots [ 917 0 R 918 0 R ] +/D [1054 0 R /XYZ 150.705 300.51 null] >> -% 917 0 obj +% 1053 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [352.861 524.509 359.835 537.101] -/A << /S /GoTo /D (section.1) >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 918 0 obj +% 1059 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [186.147 314.537 193.12 327.129] -/A << /S /GoTo /D (section.1) >> +/Type /Page +/Contents 1060 0 R +/Resources 1058 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R >> -% 922 0 obj + +endstream +endobj +1066 0 obj << -/D [920 0 R /XYZ 149.705 753.953 null] +/Length 5883 >> -% 43 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(3.2.9)-1000(is)]TJ +ET +q +1 0 0 1 188.931 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 191.92 706.129 Td [(lower)55(,)-250(is)]TJ +ET +q +1 0 0 1 230.704 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 233.693 706.129 Td [(upper)55(,)-250(is)]TJ +ET +q +1 0 0 1 273.583 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 276.572 706.129 Td [(triangle,)-250(is)]TJ +ET +q +1 0 0 1 325.309 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 328.298 706.129 Td [(unit)-250(\227)-250(Format)-250(check)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -177.593 -19.573 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_triangle\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_upper\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_lower\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.956 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050a%is_unit\050\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0 g 0 G +/F59 9.9626 Tf 0 -22.86 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ +0 g 0 G + -56.338 -34.816 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(A)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F67 9.9626 Tf 10.803 0 Td [(logical)]TJ +0 g 0 G +/F62 9.9626 Tf 39.665 0 Td [(value)-306(indicating)-307(whether)-306(the)-307(matrix)-306(is)-306(triangular;)]TJ -98.338 -11.955 Td [(if)]TJ/F67 9.9626 Tf 8.595 0 Td [(is_triangle\050\051)]TJ/F62 9.9626 Tf 70.373 0 Td [(r)18(eturns)]TJ/F67 9.9626 Tf 34.119 0 Td [(.true.)]TJ/F62 9.9626 Tf 33.761 0 Td [(check)-239(also)-238(if)-239(it)-239(is)-239(lower)74(,)-241(upper)-238(and)-239(with)]TJ -146.848 -11.955 Td [(a)-250(unit)-250(\050i.e.)-310(assumed\051)-250(diagonal.)]TJ/F59 9.9626 Tf -24.907 -28.929 Td [(3.2.10)-1000(cscnv)-250(\227)-250(Convert)-250(to)-250(a)-250(dif)18(ferent)-250(storage)-250(format)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 0 -19.573 Td [(call)]TJ +0 g 0 G + [-1050(a%cscnv\050b,info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mold,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(dupl]\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-1050(a%cscnv\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mold,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(dupl]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.861 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.183 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -33.138 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 24.906 0 Td [(a)-250(string)-250(r)18(equesting)-250(a)-250(new)-250(format.)]TJ 0.001 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.183 Td [(mold)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(a)-236(variable)-236(of)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 56.403 0 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_sparse_mat\051)]TJ/F62 9.9626 Tf 148.803 0 Td [(r)18(equesting)-236(a)-236(new)-237(format)1(.)]TJ -209.081 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.182 Td [(dupl)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(an)-359(integer)-358(value)-359(speci\002ng)-358(how)-359(to)-359(handle)-358(duplicates)-359(\050see)-359(Named)-358(Con-)]TJ -1.653 -11.956 Td [(stants)-250(below\051)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.861 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.182 Td [(b,a)]TJ +0 g 0 G +/F62 9.9626 Tf 18.54 0 Td [(A)-250(copy)-250(of)]TJ/F67 9.9626 Tf 45.37 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(with)-250(a)-250(new)-250(storage)-250(format.)]TJ -46.724 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.456 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -21.183 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ -23.801 -23.175 Td [(The)]TJ/F67 9.9626 Tf 19.584 0 Td [(mold)]TJ/F62 9.9626 Tf 23.827 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ +0 g 0 G + 166.874 -29.888 Td [(20)]TJ +0 g 0 G +ET + +endstream +endobj +1070 0 obj << -/D [920 0 R /XYZ 150.705 716.092 null] +/Length 4616 >> -% 47 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(3.2.11)-1000(csclip)-250(\227)-250(Reduce)-250(to)-250(a)-250(submatrix)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 20.922 -20.279 Td [(call)]TJ +0 g 0 G + [-525(a%csclip\050b,info[,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 15.691 -11.955 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(imin,imax,jmin,jmax,rscale,cscale]\051)]TJ/F62 9.9626 Tf -21.669 -24.631 Td [(Returns)-190(the)-190(submatrix)]TJ/F67 9.9626 Tf 98.878 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F62 9.9626 Tf 115.068 0 Td [(,)-202(optionally)-190(r)18(escaling)-190(r)18(ow/-)]TJ -228.89 -11.955 Td [(col)-250(indices)-250(to)-250(the)-250(range)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F67 9.9626 Tf 103.85 0 Td [(1)]TJ +0 g 0 G + [(:imax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(imin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(,)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:jmax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(jmin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 141.219 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -245.069 -21.961 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.639 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -34.594 Td [(imin,imax,jmin,jmax)]TJ +0 g 0 G +/F62 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.638 Td [(rscale,cscale)]TJ +0 g 0 G +/F62 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -60.025 -24.632 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.639 Td [(b)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(a)-250(submatrix)-250(of)]TJ/F67 9.9626 Tf 111.321 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ -102.713 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -22.639 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.801 -31 Td [(3.2.12)-1000(clean)]TJ +ET +q +1 0 0 1 159.153 364.307 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 162.142 364.108 Td [(zeros)-250(\227)-250(Eliminate)-250(zero)-250(coef)18(\002cients)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -62.247 -20.278 Td [(call)]TJ +0 g 0 G + [-525(a%clean_zeros\050info\051)]TJ/F62 9.9626 Tf 14.944 -12.634 Td [(Eliminates)-214(zer)18(o)-214(coef)18(\002cients)-214(in)-214(the)-214(input)-214(matrix.)-298(Note)-214(that)-214(depending)-214(on)-214(the)]TJ -14.944 -11.955 Td [(internal)-246(storage)-245(format,)-247(ther)18(e)-245(may)-246(still)-245(be)-246(some)-245(amount)-246(of)-246(ze)1(r)18(o)-246(padding)-246(in)-245(the)]TJ 0 -11.955 Td [(output.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -24.632 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.639 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -35.908 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.638 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(matrix)]TJ/F67 9.9626 Tf 50.659 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(without)-250(zer)18(o)-250(coef)18(\002cients.)]TJ -43.436 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -22.638 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ +0 g 0 G + 143.074 -29.888 Td [(21)]TJ +0 g 0 G +ET + +endstream +endobj +1074 0 obj << -/D [920 0 R /XYZ 150.705 696.532 null] +/Length 4627 >> -% 51 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(3.2.13)-1000(get)]TJ +ET +q +1 0 0 1 200.01 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 202.999 706.129 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -52.294 -19.329 Td [(call)]TJ +0 g 0 G + [-525(a%get_diag\050d,info\051)]TJ/F62 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ +0 g 0 G +/F59 9.9626 Tf -14.944 -20.49 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(d)]TJ +0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ 13.839 -11.955 Td [(A)-250(one-dimensional)-250(array)-250(of)-250(the)-250(appr)18(opriate)-250(type.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.679 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.8 -28.213 Td [(3.2.14)-1000(clip)]TJ +ET +q +1 0 0 1 203.317 472.944 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 206.306 472.745 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -55.601 -19.329 Td [(call)]TJ +0 g 0 G + [-525(a%clip_diag\050b,info\051)]TJ/F62 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)]TJ/F67 9.9626 Tf 79.73 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ +0 g 0 G +/F59 9.9626 Tf -102.395 -20.49 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.679 Td [(b)]TJ +0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)]TJ/F67 9.9626 Tf 45.37 0 Td [(a)]TJ/F62 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ -39.252 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.984 -20.679 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.8 -28.213 Td [(3.2.15)-1000(tril)-250(\227)-250(Return)-250(the)-250(lower)-250(triangle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 20.921 -19.329 Td [(call)]TJ +0 g 0 G + [-525(a%tril\050l,info[,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 15.691 -11.955 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(diag,imin,imax,jmin,jmax,rscale,cscale,u]\051)]TJ/F62 9.9626 Tf -21.668 -22.671 Td [(Returns)-309(the)-308(lower)-309(triangular)-308(part)-309(of)-309(submatrix)]TJ/F67 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F62 9.9626 Tf 115.067 0 Td [(,)]TJ -341.22 -11.956 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F67 9.9626 Tf 212.61 0 Td [(1)]TJ +0 g 0 G + [(:imax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(imin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(,)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:jmax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(jmin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf -212.61 -11.955 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(upper)-250(triangle.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.49 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 166.874 -29.888 Td [(22)]TJ +0 g 0 G +ET + +endstream +endobj +1078 0 obj << -/D [920 0 R /XYZ 150.705 498.276 null] +/Length 6324 >> -% 55 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ +0 g 0 G +/F62 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F67 9.9626 Tf 149.76 0 Td [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F67 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=-)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ +0 g 0 G +/F59 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ +0 g 0 G +/F62 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ +0 g 0 G +/F62 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -18.75 Td [(l)]TJ +0 g 0 G +/F62 9.9626 Tf 8.299 0 Td [(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F67 9.9626 Tf 137.333 0 Td [(a)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ -125.956 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -18.75 Td [(u)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F67 9.9626 Tf 184.485 0 Td [(a)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ -175.878 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -18.749 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.801 -26.59 Td [(3.2.16)-1000(triu)-250(\227)-250(Return)-250(the)-250(upper)-250(triangle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 20.922 -18.964 Td [(call)]TJ +0 g 0 G + [-525(a%triu\050u,info[,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 15.691 -11.955 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(diag,imin,imax,jmin,jmax,rscale,cscale,l]\051)]TJ/F62 9.9626 Tf -21.669 -18.979 Td [(Returns)-289(the)-290(upper)-289(triangular)-290(part)-289(of)-290(submatrix)]TJ/F67 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F62 9.9626 Tf 115.068 0 Td [(,)]TJ -341.221 -11.955 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F67 9.9626 Tf 212.611 0 Td [(1)]TJ +0 g 0 G + [(:imax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(imin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(,)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:jmax)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(-)]TJ +0 g 0 G + [(jmin)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(+)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 141.219 0 Td [(,)]TJ -353.83 -11.956 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(lower)-250(triangle.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -17.574 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -18.75 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -18.75 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ +0 g 0 G +/F62 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F67 9.9626 Tf 149.76 0 Td [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F67 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=-)]TJ +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G +/F62 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ +0 g 0 G +/F59 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ +0 g 0 G +/F62 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ +0 g 0 G +/F62 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +0 g 0 G +/F59 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -18.75 Td [(u)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F67 9.9626 Tf 138.668 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ -130.06 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -18.75 Td [(l)]TJ +0 g 0 G +/F62 9.9626 Tf 8.299 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F67 9.9626 Tf 183.151 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ -171.773 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F67 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F62 9.9626 Tf 78.455 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -184.985 -18.75 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ +0 g 0 G + 143.074 -29.888 Td [(23)]TJ +0 g 0 G +ET + +endstream +endobj +1084 0 obj << -/D [920 0 R /XYZ 150.705 288.305 null] +/Length 7619 >> -% 919 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(3.2.17)-1000(psb)]TJ +ET +q +1 0 0 1 202.769 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 205.758 706.129 Td [(set)]TJ +ET +q +1 0 0 1 219.078 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 222.067 706.129 Td [(mat)]TJ +ET +q +1 0 0 1 239.82 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 242.809 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -92.104 -18.964 Td [(call)]TJ +0 g 0 G + [-1050(psb_set_mat_default\050a\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.231 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(a)-203(variable)-203(of)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 55.42 0 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_sparse_mat\051)]TJ/F62 9.9626 Tf 148.475 0 Td [(r)18(equesting)-203(a)-204(new)-203(default)-203(stor)18(-)]TJ -188.951 -11.956 Td [(age)-250(format.)]TJ 0 -11.955 Td [(T)90(ype:)-310(r)18(equir)18(ed.)]TJ/F59 9.9626 Tf -24.907 -26.815 Td [(3.2.18)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 0 -18.964 Td [(call)]TJ +0 g 0 G + [-1050(a%clone\050b,info\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.231 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.917 -32.138 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.232 Td [(b)]TJ +0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +0 g 0 G +/F59 9.9626 Tf -11.068 -19.231 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F59 9.9626 Tf -23.8 -26.815 Td [(3.2.19)-1000(Named)-250(Constants)]TJ +0 g 0 G + 0 -18.964 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 372.049 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 371.85 Td [(dupl)]TJ +ET +q +1 0 0 1 193.066 372.049 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 196.055 371.85 Td [(ovwrt)]TJ +ET +q +1 0 0 1 223.222 372.049 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 231.193 371.85 Td [(Duplicate)-259(coef)18(\002cients)-259(shou)1(ld)-259(be)-259(overwritten)-259(\050i.e.)-336(ignor)18(e)-259(du-)]TJ -55.582 -11.955 Td [(plications\051)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.231 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 340.863 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 340.664 Td [(dupl)]TJ +ET +q +1 0 0 1 193.066 340.863 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 196.055 340.664 Td [(add)]TJ +ET +q +1 0 0 1 213.808 340.863 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 221.778 340.664 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(be)-250(added;)]TJ +0 g 0 G +/F59 9.9626 Tf -71.073 -19.232 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 321.632 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 321.432 Td [(dupl)]TJ +ET +q +1 0 0 1 193.066 321.632 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 196.055 321.432 Td [(err)]TJ +ET +q +1 0 0 1 209.384 321.632 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 217.355 321.432 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(trigger)-250(an)-250(err)18(or)-250(conditino)]TJ +0 g 0 G +/F59 9.9626 Tf -66.65 -19.231 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 302.4 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 302.201 Td [(upd)]TJ +ET +q +1 0 0 1 189.748 302.4 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 192.737 302.201 Td [(d\003t)]TJ +ET +q +1 0 0 1 208.827 302.4 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 216.797 302.201 Td [(Default)-250(update)-250(strategy)-250(for)-250(matrix)-250(coef)18(\002cients;)]TJ +0 g 0 G +/F59 9.9626 Tf -66.092 -19.231 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 283.169 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 282.97 Td [(upd)]TJ +ET +q +1 0 0 1 189.748 283.169 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 192.737 282.97 Td [(srch)]TJ +ET +q +1 0 0 1 212.144 283.169 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 220.114 282.97 Td [(Update)-250(strategy)-250(based)-250(on)-250(sear)18(ch)-250(into)-250(the)-250(data)-250(str)8(uctur)18(e;)]TJ +0 g 0 G +/F59 9.9626 Tf -69.409 -19.232 Td [(psb)]TJ +ET +q +1 0 0 1 167.9 263.938 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 170.889 263.738 Td [(upd)]TJ +ET +q +1 0 0 1 189.748 263.938 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 192.737 263.738 Td [(perm)]TJ +ET +q +1 0 0 1 217.135 263.938 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 225.106 263.738 Td [(Update)-392(strategy)-393(based)-392(on)-393(additional)-392(permutation)-393(data)-392(\050see)]TJ -49.495 -11.955 Td [(tools)-250(r)18(outine)-250(description\051.)]TJ/F59 11.9552 Tf -24.906 -28.807 Td [(3.3)-1000(Dense)-250(V)111(ector)-250(Data)-250(Structure)]TJ/F62 9.9626 Tf 0 -18.964 Td [(The)]TJ/F67 9.9626 Tf 20.094 0 Td [(psb)]TJ +ET +q +1 0 0 1 187.117 204.211 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 190.255 204.012 Td [(T)]TJ +ET +q +1 0 0 1 196.113 204.211 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 199.252 204.012 Td [(vect)]TJ +ET +q +1 0 0 1 220.801 204.211 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 223.939 204.012 Td [(type)]TJ/F62 9.9626 Tf 24.338 0 Td [(data)-343(str)8(uctur)18(e)-343(encapsulates)-343(the)-343(dense)-343(vectors)-342(in)-343(a)-343(way)]TJ -97.572 -11.955 Td [(similar)-368(to)-368(sparse)-368(matrices,)-397(i.e.)-664(including)-368(a)-368(base)-368(type)]TJ/F67 9.9626 Tf 242.472 0 Td [(psb)]TJ +ET +q +1 0 0 1 409.495 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 412.633 192.057 Td [(T)]TJ +ET +q +1 0 0 1 418.491 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 421.63 192.057 Td [(base)]TJ +ET +q +1 0 0 1 443.178 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 446.317 192.057 Td [(vect)]TJ +ET +q +1 0 0 1 467.866 192.256 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 471.004 192.057 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ -341.22 -11.956 Td [(The)-263(user)-263(will)-263(not,)-266(in)-263(general,)-267(access)-263(the)-263(vector)-263(components)-263(dir)18(ectly)111(,)-266(but)-263(rather)]TJ 0 -11.955 Td [(via)-222(the)-222(r)18(out)1(ines)-222(of)-222(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-222(6)]TJ +0 g 0 G + [(.)-300(Among)-222(other)-222(simple)-222(things,)-227(we)-222(de\002ne)-222(her)18(e)-221(an)-222(extrac-)]TJ 0 -11.955 Td [(tion)-273(method)-274(that)-273(can)-274(be)-273(used)-274(to)-273(get)-274(a)-273(full)-274(copy)-273(of)-274(the)-273(part)-274(of)-273(the)-274(vector)-273(stor)18(ed)]TJ 0 -11.955 Td [(on)-250(the)-250(local)-250(pr)18(ocess.)]TJ 14.944 -11.955 Td [(The)-311(type)-311(declaration)-311(is)-310(shown)-311(in)-311(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-311(3)]TJ +0 g 0 G + [-311(wher)18(e)]TJ/F67 9.9626 Tf 217.442 0 Td [(T)]TJ/F62 9.9626 Tf 8.327 0 Td [(is)-311(a)-311(placeholder)-311(for)-310(the)]TJ -240.713 -11.955 Td [(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ +0 g 0 G + 166.874 -29.888 Td [(24)]TJ +0 g 0 G +ET + +endstream +endobj +1090 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/Length 5064 >> -% 924 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(I)]TJ +0 g 0 G +/F62 9.9626 Tf 8.857 0 Td [(Integer;)]TJ +0 g 0 G +/F59 9.9626 Tf -8.857 -20.359 Td [(S)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ +0 g 0 G +/F59 9.9626 Tf -11.069 -20.358 Td [(D)]TJ +0 g 0 G +/F62 9.9626 Tf 13.281 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ +0 g 0 G +/F59 9.9626 Tf -13.281 -20.359 Td [(C)]TJ +0 g 0 G +/F62 9.9626 Tf 12.175 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ +0 g 0 G +/F59 9.9626 Tf -12.175 -20.358 Td [(Z)]TJ +0 g 0 G +/F62 9.9626 Tf 11.627 0 Td [(Double)-250(pr)18(ecision)-250(complex.)]TJ -11.627 -20.251 Td [(The)-209(actual)-208(data)-209(is)-208(contained)-209(in)-209(the)-208(polymorphic)-209(component)]TJ/F67 9.9626 Tf 261.152 0 Td [(v%v)]TJ/F62 9.9626 Tf 15.691 0 Td [(;)-222(the)-209(separation)]TJ -276.843 -11.955 Td [(between)-353(the)-353(application)-353(and)-353(the)-353(actual)-353(data)-353(is)-353(esse)1(ntial)-353(for)-353(cases)-353(wher)18(e)-353(it)-353(is)]TJ 0 -11.955 Td [(necessary)-321(to)-321(link)-320(to)-321(data)-321(storage)-321(made)-320(available)-321(elsewher)18(e)-321(outside)-320(the)-321(dir)18(ect)]TJ 0 -11.955 Td [(contr)18(ol)-231(of)-231(the)-231(compiler/application,)-235(e.g.)-304(data)-231(stor)18(ed)-231(in)-231(a)-231(graphics)-231(accelerator)-74('s)]TJ 0 -11.955 Td [(private)-250(memory)111(.)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 99.895 452.975 cm +0 0 343.711 82.69 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 112.299 525.005 Td [(type)]TJ +0 g 0 G + [-525(psb_T_base_vect_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.414 -10.959 Td [(TYPE)]TJ +0 g 0 G + [(\050KIND_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_T_base_vect_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(type)]TJ +0 g 0 G + [-525(psb_T_vect_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.414 -10.959 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_vect_type\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-1050(psb_T_vect_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 6.677 -41.429 Td [(Listing)-250(3:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(dense)-250(vector)74(.)]TJ/F59 9.9626 Tf -19.081 -39.929 Td [(3.3.1)-1000(V)111(ector)-250(Methods)]TJ 0 -19.174 Td [(3.3.2)-1000(get)]TJ +ET +q +1 0 0 1 144.219 358.919 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 358.719 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(dense)-250(vector)]TJ/F67 9.9626 Tf -47.313 -19.173 Td [(nr)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v%get_nrows\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.359 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.358 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -34.198 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.358 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(dense)-250(vector)]TJ/F67 9.9626 Tf 161.273 0 Td [(v)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ/F59 9.9626 Tf -239.281 -27.757 Td [(3.3.3)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(dense)-250(vector)]TJ/F67 9.9626 Tf 0 -19.174 Td [(memory_size)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(v%sizeof\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.358 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 166.875 -29.888 Td [(25)]TJ +0 g 0 G +ET + +endstream +endobj +1097 0 obj << -/Type /Page -/Contents 925 0 R -/Resources 923 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/Length 3867 >> -% 926 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.338 -36.868 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -23.918 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.776 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F59 9.9626 Tf -72.776 -32.82 Td [(3.3.4)-1000(set)-250(\227)-250(Set)-250(contents)-250(of)-250(the)-250(vector)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 5.23 -20.898 Td [(call)]TJ +0 g 0 G + [-1050(v%set\050alpha[,first,last]\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-1050(v%set\050vect[,first,last]\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-1050(v%zero\050\051)]TJ +0 g 0 G +/F59 9.9626 Tf -5.23 -24.913 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -23.918 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -23.918 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.338 -35.873 Td [(alpha)]TJ +0 g 0 G +/F62 9.9626 Tf 30.436 0 Td [(A)-250(scalar)-250(value.)]TJ -5.53 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -23.918 Td [(\002rst,last)]TJ +0 g 0 G +/F62 9.9626 Tf 41.215 0 Td [(Boundaries)-250(for)-250(setting)-250(in)-250(the)-250(vector)74(.)]TJ -16.309 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integers.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -23.918 Td [(vect)]TJ +0 g 0 G +/F62 9.9626 Tf 23.242 0 Td [(An)-250(array)]TJ 1.664 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ +0 g 0 G + [(.)]TJ -24.906 -25.91 Td [(Note)-336(that)-336(a)-335(call)-336(to)]TJ/F67 9.9626 Tf 84.614 0 Td [(v%zero\050\051)]TJ/F62 9.9626 Tf 45.189 0 Td [(is)-336(pr)18(ovided)-336(as)-335(a)-336(shorthand,)-358(but)-335(is)-336(equivalent)-336(to)]TJ -129.803 -11.956 Td [(a)-270(call)-270(to)]TJ/F67 9.9626 Tf 36.947 0 Td [(v%set\050zero\051)]TJ/F62 9.9626 Tf 60.225 0 Td [(with)-270(the)]TJ/F67 9.9626 Tf 39.456 0 Td [(zero)]TJ/F62 9.9626 Tf 23.613 0 Td [(constant)-270(having)-270(the)-271(appr)18(opriat)1(e)-271(type)-270(and)]TJ -160.241 -11.955 Td [(kind.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -25.91 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -23.918 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)74(,)-250(with)-250(updated)-250(entries)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G +/F62 9.9626 Tf 110.536 -41.843 Td [(26)]TJ +0 g 0 G +ET + +endstream +endobj +1104 0 obj << -/D [924 0 R /XYZ 98.895 753.953 null] +/Length 4464 >> -% 59 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(3.3.5)-1000(get)]TJ +ET +q +1 0 0 1 144.219 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 147.208 706.129 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -47.313 -19.66 Td [(extv)-525(=)-525(v%get_vect\050[n]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.994 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -21.362 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.361 Td [(v)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ +0 g 0 G + -56.339 -33.316 Td [(n)]TJ +0 g 0 G +/F62 9.9626 Tf 11.069 0 Td [(Size)-250(to)-250(be)-250(r)18(eturned)]TJ 13.838 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(;)-250(default:)-310(entir)18(e)-250(vector)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -90.182 -34.95 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.361 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(An)-316(allocatable)-316(array)-316(holding)-316(a)-317(copy)-316(of)-316(the)-316(dense)-316(vector)-316(con-)]TJ -47.87 -11.955 Td [(tents.)-321(If)-254(the)-254(ar)18(gument)]TJ/F60 9.9626 Tf 98.086 0 Td [(n)]TJ/F62 9.9626 Tf 8.192 0 Td [(is)-254(speci\002ed,)-255(the)-253(size)-254(of)-254(the)-254(r)18(eturned)-254(array)-253(equals)]TJ -106.278 -11.955 Td [(the)-339(minimum)-339(between)]TJ/F60 9.9626 Tf 105.247 0 Td [(n)]TJ/F62 9.9626 Tf 9.041 0 Td [(and)-339(the)-339(internal)-339(size)-339(of)-339(the)-339(vector)74(,)-361(or)-339(0)-339(if)]TJ/F60 9.9626 Tf 188.353 0 Td [(n)]TJ/F62 9.9626 Tf 9.04 0 Td [(is)]TJ -311.681 -11.956 Td [(negative;)-314(otherwise,)-303(the)-292(size)-293(of)-292(the)-293(array)-292(is)-293(the)-292(same)-293(as)-292(the)-293(internal)-292(size)]TJ 0 -11.955 Td [(of)-250(the)-250(vector)74(.)]TJ/F59 9.9626 Tf -24.907 -29.183 Td [(3.3.6)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf 0 -19.659 Td [(call)-1050(x%clone\050y,info\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.995 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -21.361 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -21.362 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(dense)-250(vector)74(.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -77.918 -34.95 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.361 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +0 g 0 G +/F59 9.9626 Tf -10.521 -21.361 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F59 11.9552 Tf -23.801 -31.176 Td [(3.4)-1000(Preconditioner)-250(data)-250(structure)]TJ/F62 9.9626 Tf 0 -19.659 Td [(Our)-396(base)-397(l)1(ibrary)-397(of)18(fers)-396(support)-396(for)-396(simple)-397(well)-396(known)-396(pr)18(econditioners)-396(like)]TJ 0 -11.956 Td [(Diagonal)-250(Scaling)-250(or)-250(Block)-250(Jacobi)-250(with)-250(incomplete)-250(factorization)-250(ILU\0500\051.)]TJ 14.944 -12.314 Td [(A)-361(pr)18(econditioner)-361(is)-361(held)-361(in)-361(the)]TJ/F67 9.9626 Tf 143.781 0 Td [(psb)]TJ +ET +q +1 0 0 1 274.939 168.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 278.077 168.146 Td [(prec)]TJ +ET +q +1 0 0 1 299.626 168.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 302.764 168.146 Td [(type)]TJ/F62 9.9626 Tf 24.519 0 Td [(data)-361(str)8(uctur)18(e)-361(r)18(eported)-361(in)]TJ -227.388 -11.955 Td [(\002gur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-282(4)]TJ +0 g 0 G + [(.)-407(The)]TJ/F67 9.9626 Tf 59.933 0 Td [(psb_prec_type)]TJ/F62 9.9626 Tf 70.808 0 Td [(data)-282(type)-283(may)-282(contain)-282(a)-283(simple)-282(pr)18(econditioning)]TJ -130.741 -11.955 Td [(matrix)-376(with)-376(the)-376(associated)-376(communication)-375(descriptor)74(.The)-376(internal)-376(pr)18(econdi-)]TJ 0 -11.955 Td [(tioner)-317(is)-317(allocated)-318(appr)18(opriately)-317(with)-317(the)-317(dynamic)-318(type)-317(corr)18(esponding)-317(to)-317(the)]TJ 0 -11.955 Td [(desir)18(ed)-250(pr)18(econditioner)74(.)]TJ +0 g 0 G + 166.875 -29.888 Td [(27)]TJ +0 g 0 G +ET + +endstream +endobj +1110 0 obj << -/D [924 0 R /XYZ 99.895 716.092 null] +/Length 4234 >> -% 63 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +q +1 0 0 1 150.705 671.26 cm +0 0 343.711 38.854 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 163.108 699.454 Td [(type)]TJ +0 g 0 G + [-525(psb_Tprec_type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.959 Td [(class)]TJ +0 g 0 G + [(\050psb_T_base_prec_type\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(prec)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_Tprec_type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 1.845 -41.429 Td [(Listing)-250(4:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(pr)18(econditioner)74(.)]TJ/F59 11.9552 Tf -14.248 -32.698 Td [(3.5)-1000(Heap)-250(data)-250(structure)]TJ/F62 9.9626 Tf 0 -18.964 Td [(Among)-310(the)-311(tools)-310(r)18(outines)-310(of)-310(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-311(6)]TJ +0 g 0 G + [(,)-325(we)-310(have)-311(a)-310(number)-310(of)-311(so)1(rting)-311(utilities;)-340(the)]TJ 0 -11.955 Td [(heap)-250(sort)-250(is)-250(implemented)-250(in)-250(terms)-250(of)-250(heaps)-250(having)-250(the)-250(following)-250(signatur)18(es:)]TJ +0 g 0 G +/F67 9.9626 Tf 0 -19.925 Td [(psb)]TJ +ET +q +1 0 0 1 167.023 552.764 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 170.162 552.565 Td [(T)]TJ +ET +q +1 0 0 1 176.02 552.764 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 179.158 552.565 Td [(heap)]TJ +0 g 0 G +/F62 9.9626 Tf 25.903 0 Td [(:)-333(a)-262(heap)-262(containing)-262(e)1(lements)-262(of)-262(type)-262(T)74(,)-261(wher)18(e)-262(T)-262(can)-261(be)]TJ/F67 9.9626 Tf 242.282 0 Td [(i,s,c,d,z)]TJ/F62 9.9626 Tf -271.731 -11.955 Td [(for)-250(integer)74(,)-250(r)18(eal)-250(and)-250(complex)-250(data;)]TJ +0 g 0 G +/F67 9.9626 Tf -24.907 -19.925 Td [(psb)]TJ +ET +q +1 0 0 1 167.023 520.884 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 170.162 520.685 Td [(T)]TJ +ET +q +1 0 0 1 176.02 520.884 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 179.158 520.685 Td [(idx)]TJ +ET +q +1 0 0 1 195.476 520.884 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 198.615 520.685 Td [(heap)]TJ +0 g 0 G +/F62 9.9626 Tf 25.902 0 Td [(:)-289(a)-207(heap)-207(containing)-207(elements)-207(of)-207(type)-207(T)74(,)-207(as)-207(above,)-215(together)-207(with)]TJ -48.906 -11.956 Td [(an)-250(integer)-250(index.)]TJ -24.906 -19.925 Td [(Given)-250(a)-250(heap)-250(object,)-250(the)-250(following)-250(methods)-250(ar)18(e)-250(de\002ned)-250(on)-250(it:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -19.925 Td [(init)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(Initialize)-250(memory;)-250(also)-250(choose)-250(ascending)-250(or)-250(descending)-250(or)18(der;)]TJ +0 g 0 G +/F59 9.9626 Tf -21.021 -19.925 Td [(howmany)]TJ +0 g 0 G +/F62 9.9626 Tf 50.371 0 Td [(Curr)18(ent)-250(heap)-250(occupancy;)]TJ +0 g 0 G +/F59 9.9626 Tf -50.371 -19.926 Td [(insert)]TJ +0 g 0 G +/F62 9.9626 Tf 30.983 0 Td [(Add)-250(an)-250(item)-250(\050or)-250(an)-250(item)-250(and)-250(its)-250(index\051;)]TJ +0 g 0 G +/F59 9.9626 Tf -30.983 -19.925 Td [(get)]TJ +ET +q +1 0 0 1 165.141 409.302 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 168.129 409.103 Td [(\002rst)]TJ +0 g 0 G +/F62 9.9626 Tf 22.685 0 Td [(Remove)-250(and)-250(r)18(eturn)-250(the)-250(\002rst)-250(element;)]TJ +0 g 0 G +/F59 9.9626 Tf -40.109 -19.925 Td [(dump)]TJ +0 g 0 G +/F62 9.9626 Tf 32.099 0 Td [(Print)-250(on)-250(\002le;)]TJ +0 g 0 G +/F59 9.9626 Tf -32.099 -19.926 Td [(free)]TJ +0 g 0 G +/F62 9.9626 Tf 22.695 0 Td [(Release)-250(memory)111(.)]TJ -22.695 -19.925 Td [(These)-305(objects)-305(ar)18(e)-305(used)-305(in)-305(AMG4PSBLAS)-305(to)-305(implement)-305(the)-305(factorization)-305(algo-)]TJ 0 -11.955 Td [(rithms.)]TJ +0 g 0 G + 166.874 -246.934 Td [(28)]TJ +0 g 0 G +ET + +endstream +endobj +1114 0 obj << -/D [924 0 R /XYZ 99.895 529.559 null] +/Length 158 >> +stream +0 g 0 G +0 g 0 G +BT +/F59 14.3462 Tf 99.895 705.784 Td [(4)-1000(Computational)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 166.875 -615.346 Td [(29)]TJ +0 g 0 G +ET endstream endobj -931 0 obj +1125 0 obj << -/Length 4754 +/Length 7465 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.1.8)-1000(Clone)-250(\227)-250(clone)-250(current)-250(object)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.1)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ/F62 9.9626 Tf -51.12 -19.189 Td [(This)-358(subr)18(outine)-358(is)-359(an)-358(interface)-358(to)-358(the)-358(computational)-359(kernel)-358(for)-358(dense)-358(matrix)]TJ 0 -11.955 Td [(sum:)]TJ/F60 9.9626 Tf 143.149 -12.304 Td [(y)]TJ/F91 10.3811 Tf 7.998 0 Td [(\040)]TJ/F68 9.9626 Tf 13.397 0 Td [(a)]TJ/F60 9.9626 Tf 7.616 0 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -175.407 -18.398 Td [(call)]TJ +0 g 0 G + [-525(psb_geaxpby\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +q +1 0 0 1 227.737 629.682 cm +[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S +Q +BT +/F60 9.9626 Tf 234.009 621.114 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(,)]TJ/F68 9.9626 Tf 5.105 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(,)]TJ/F68 9.9626 Tf 5.355 0 Td [(b)]TJ/F59 9.9626 Tf 89.359 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 227.737 617.328 cm +[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S +Q +BT +/F62 9.9626 Tf 233.715 608.761 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 370.948 608.96 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 373.937 608.761 Td [(geaxpby)]TJ -140.222 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 370.948 597.005 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 373.937 596.805 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 370.948 585.05 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 373.937 584.85 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 370.948 573.094 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 373.937 572.895 Td [(geaxpby)]TJ +ET +q +1 0 0 1 227.737 569.109 cm +[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 280.768 540.731 Td [(T)92(able)-250(1:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%clone\050descout,info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -130.063 -35.05 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.39 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -20.598 Td [(desc)]TJ + 0 -20.391 Td [(alpha)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.368 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -34.377 Td [(On)-250(Return)]TJ + [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.907 -20.391 Td [(x)]TJ 0 g 0 G - 0 -20.597 Td [(descout)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(o)1(r)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 349.068 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 348.869 Td [(T)]TJ +ET +q +1 0 0 1 445.669 349.068 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 348.869 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 349.068 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 348.869 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-312(1)]TJ 0 g 0 G -/F54 9.9626 Tf 39.84 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ + [(.)-494(The)-312(rank)-312(of)]TJ/F60 9.9626 Tf 274.834 0 Td [(x)]TJ/F62 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -39.84 -20.597 Td [(info)]TJ +/F59 9.9626 Tf -82.958 -20.391 Td [(beta)]TJ +0 g 0 G +/F62 9.9626 Tf 24.348 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.618 0 Td [(b)]TJ/F62 9.9626 Tf 5.524 0 Td [(.)]TJ -49.584 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -28.097 Td [(3.1.9)-1000(CNV)-250(\227)-250(convert)-250(internal)-250(storage)-250(format)]TJ + [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.906 -20.391 Td [(y)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -19.289 Td [(call)-1050(desc%cnv\050mold\051)]TJ +/F62 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 191.754 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(.)]TJ -182.473 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 188.736 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 188.537 Td [(T)]TJ +ET +q +1 0 0 1 445.669 188.736 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 188.537 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 188.736 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 188.537 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf -297.884 -11.956 Td [(containing)-276(numbers)-277(of)-276(the)-276(type)-276(indicated)-277(in)-276(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-276(1)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(.)-389(The)-276(rank)-277(of)]TJ/F60 9.9626 Tf 288.67 0 Td [(y)]TJ/F62 9.9626 Tf 7.859 0 Td [(must)]TJ -296.529 -11.955 Td [(be)-250(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 65.888 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -95.999 -20.39 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 144.236 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ 0 g 0 G - 0 -20.597 Td [(desc)]TJ +/F62 9.9626 Tf 114.879 -29.888 Td [(30)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +ET + +endstream +endobj +1132 0 obj +<< +/Length 2404 +>> +stream 0 g 0 G -/F51 9.9626 Tf -77.917 -32.553 Td [(mold)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(the)-250(desir)18(ed)-250(integer)-250(storage)-250(format.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F59 9.9626 Tf 221.926 0 Td [(psb)]TJ +BT +/F62 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 413.855 356.476 cm +1 0 0 1 273.363 694.373 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 416.994 356.277 Td [(T)]TJ +/F67 9.9626 Tf 276.501 694.174 Td [(desc)]TJ ET q -1 0 0 1 422.851 356.476 cm +1 0 0 1 298.05 694.373 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 425.99 356.277 Td [(base)]TJ +/F67 9.9626 Tf 301.189 694.174 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -222.215 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.68 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 447.539 356.476 cm +1 0 0 1 385.864 604.709 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 450.677 356.277 Td [(vect)]TJ +/F67 9.9626 Tf 389.002 604.51 Td [(T)]TJ ET q -1 0 0 1 472.226 356.476 cm +1 0 0 1 394.86 604.709 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 475.364 356.277 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -345.58 -22.59 Td [(The)]TJ/F59 9.9626 Tf 19.583 0 Td [(mold)]TJ/F54 9.9626 Tf 23.828 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ/F51 9.9626 Tf 0 -28.096 Td [(3.1.10)-1000(psb)]TJ +/F67 9.9626 Tf 397.998 604.51 Td [(vect)]TJ ET q -1 0 0 1 202.769 293.835 cm +1 0 0 1 419.547 604.709 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.685 604.51 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -434.371 Td [(31)]TJ +0 g 0 G +ET + +endstream +endobj +1142 0 obj +<< +/Length 7647 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(4.2)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(gedot)-250(\227)-250(Dot)-250(Product)]TJ/F62 9.9626 Tf -51.12 -18.976 Td [(This)-250(function)-250(computes)-250(dot)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F60 9.9626 Tf 254.647 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.481 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -286.93 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.459 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 7.597 0 Td [(ar)18(e)-250(r)18(eal)-250(vectors)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F60 9.9626 Tf 104.717 -23.132 Td [(d)-25(o)-35(t)]TJ/F91 10.3811 Tf 16.337 0 Td [(\040)]TJ/F60 9.9626 Tf 13.566 0 Td [(x)]TJ/F60 7.5716 Tf 5.399 4.115 Td [(T)]TJ/F60 9.9626 Tf 5.525 -4.115 Td [(y)]TJ/F62 9.9626 Tf -189.778 -21.93 Td [(Else)-250(if)]TJ/F60 9.9626 Tf 29.474 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 7.596 0 Td [(ar)18(e)-250(complex)-250(vectors)-250(then)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F60 9.9626 Tf 83.965 -23.132 Td [(d)-25(o)-35(t)]TJ/F91 10.3811 Tf 16.336 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F60 7.5716 Tf 5.588 4.115 Td [(H)]TJ/F60 9.9626 Tf 6.812 -4.115 Td [(y)]TJ/F67 9.9626 Tf -175.572 -21.937 Td [(psb_gedot\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([,global]\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +q +1 0 0 1 233.929 570.686 cm +[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +Q +BT +/F60 9.9626 Tf 240.031 562.118 Td [(d)-25(o)-35(t)]TJ/F62 9.9626 Tf 13.444 0 Td [(,)]TJ/F60 9.9626 Tf 5.276 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F59 9.9626 Tf 91.76 0 Td [(Function)]TJ +ET +q +1 0 0 1 233.929 558.332 cm +[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +Q +BT +/F62 9.9626 Tf 239.906 549.765 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 377.14 549.964 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 205.758 293.636 Td [(cd)]TJ +/F62 9.9626 Tf 380.129 549.765 Td [(gedot)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 216.867 293.835 cm +1 0 0 1 377.14 538.009 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 219.855 293.636 Td [(get)]TJ +/F62 9.9626 Tf 380.129 537.809 Td [(gedot)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 234.291 293.835 cm +1 0 0 1 377.14 526.053 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 237.28 293.636 Td [(large)]TJ +/F62 9.9626 Tf 380.129 525.854 Td [(gedot)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 260.572 293.835 cm +1 0 0 1 377.14 514.098 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 263.561 293.636 Td [(threshold)-190(\227)-190(Get)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -112.856 -19.29 Td [(ith)-525(=)-525(psb_cd_get_large_threshold\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.421 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 380.129 513.899 Td [(gedot)]TJ +ET +q +1 0 0 1 233.929 510.113 cm +[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 280.768 481.735 Td [(T)92(able)-250(2:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F59 9.9626 Tf -130.063 -34.507 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.951 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.951 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 359.705 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 359.506 Td [(T)]TJ +ET +q +1 0 0 1 445.669 359.705 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 359.506 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 359.705 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 359.506 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.956 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-312(2)]TJ +0 g 0 G + [(.)-494(The)-312(rank)-312(of)]TJ/F60 9.9626 Tf 274.834 0 Td [(x)]TJ/F62 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -82.958 -19.951 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 268.023 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 267.824 Td [(T)]TJ +ET +q +1 0 0 1 445.669 268.023 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 267.824 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 268.023 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 267.824 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-313(2)]TJ 0 g 0 G - 0 -20.598 Td [(Function)-250(value)]TJ + [(.)-500(The)-314(rank)-313(of)]TJ/F60 9.9626 Tf 274.898 0 Td [(y)]TJ/F62 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.956 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 53.116 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(curr)18(ent)-250(value)-250(for)-250(the)-250(size)-250(thr)18(eshold.)]TJ/F51 9.9626 Tf -72.777 -28.096 Td [(3.1.11)-1000(psb)]TJ -ET -q -1 0 0 1 202.769 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 205.758 182.634 Td [(cd)]TJ +/F59 9.9626 Tf -83.227 -19.95 Td [(desc)]TJ ET q -1 0 0 1 216.867 182.833 cm +1 0 0 1 171.218 224.162 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 219.855 182.634 Td [(set)]TJ +/F59 9.9626 Tf 174.207 223.963 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 233.175 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 324.173 176.341 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 236.164 182.634 Td [(large)]TJ +/F67 9.9626 Tf 327.311 176.142 Td [(desc)]TJ ET q -1 0 0 1 259.457 182.833 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 348.86 176.341 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 262.445 182.634 Td [(threshold)-190(\227)-190(Set)-190(threshold)-190(for)-190(index)-190(mapping)-190(switch)]TJ +/F67 9.9626 Tf 351.998 176.142 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F59 9.9626 Tf -111.74 -19.289 Td [(call)-525(psb_cd_set_large_threshold\050ith\051)]TJ +/F59 9.9626 Tf -222.214 -19.951 Td [(global)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.422 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.597 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 166.874 -29.888 Td [(14)]TJ + 76.693 -29.888 Td [(32)]TJ 0 g 0 G ET endstream endobj -936 0 obj +1148 0 obj << -/Length 5640 +/Length 4270 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(ith)]TJ +/F62 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F67 9.9626 Tf 165.318 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 17.704 0 Td [(the)-250(new)-250(thr)18(eshold)-250(for)-250(communication)-250(descriptors.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(gr)18(eater)-250(than)-250(zer)18(o.)]TJ -24.907 -20.813 Td [(Note:)-665(the)-427(thr)18(eshold)-428(value)-427(is)-428(only)-427(queried)-428(by)-427(the)-428(library)-427(at)-428(the)-427(time)-428(a)-427(call)]TJ 0 -11.955 Td [(to)]TJ/F59 9.9626 Tf 12.451 0 Td [(psb_cdall)]TJ/F54 9.9626 Tf 50.837 0 Td [(is)-378(executed,)-409(ther)18(efor)18(e)-378(changing)-378(the)-378(thr)18(eshold)-377(has)-378(no)-378(ef)18(fect)-378(on)]TJ -63.288 -11.955 Td [(communication)-339(descriptors)-340(that)-339(have)-339(alr)18(eady)-340(been)-339(initialized.)-578(Mor)18(eover)-339(the)]TJ 0 -11.955 Td [(thr)18(eshold)-250(must)-250(have)-250(the)-250(same)-250(value)-250(on)-250(all)-250(pr)18(ocesses.)]TJ/F51 9.9626 Tf 0 -26.933 Td [(3.1.12)-1000(get)]TJ -ET -q -1 0 0 1 149.2 574.896 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 152.189 574.697 Td [(p)]TJ -ET -q -1 0 0 1 158.874 574.896 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 161.863 574.697 Td [(adjcncy)-250(\227)-250(Get)-250(process)-250(adjacency)-250(list)]TJ + [(.true.)]TJ 0 g 0 G +/F59 9.9626 Tf -190.225 -31.881 Td [(On)-250(Return)]TJ 0 g 0 G -/F59 9.9626 Tf -61.968 -18.964 Td [(list)-525(=)-1050(desc%get_p_adjcncy\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F60 9.9626 Tf 126.33 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(.)]TJ -206.483 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.133 0 Td [(global)]TJ/F62 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F67 9.9626 Tf 121.612 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.483 Td [(On)-250(Return)]TJ + [(.false.)]TJ/F62 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -19.484 Td [(Function)-250(value)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-190(curr)18(ent)-190(list)-190(of)-190(adjacent)-190(pr)18(ocesses,)-202(i.e.)-290(pr)18(ocesses)-190(with)-190(which)]TJ -47.87 -11.955 Td [(the)-250(curr)18(ent)-250(one)-250(has)-250(to)-250(exchange)-250(halo)-250(data.)]TJ/F51 9.9626 Tf -24.907 -26.933 Td [(3.1.13)-1000(set)]TJ -ET -q -1 0 0 1 148.085 457.264 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 151.073 457.065 Td [(p)]TJ -ET -q -1 0 0 1 157.758 457.264 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 160.747 457.065 Td [(adjcncy)-250(\227)-250(Set)-250(process)-250(adjacency)-250(list)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ 0 g 0 G +/F62 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ 0 g 0 G -/F59 9.9626 Tf -60.852 -18.964 Td [(call)-525(desc%set_p_adjcncy\050list\051)]TJ + [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-204(multiple)-204(dot)-204(pr)18(oducts)-204(at)-204(the)-204(same)-204(time;)-219(in)-204(this)-204(case,)-213(it)-204(is)-204(possible)]TJ 0 -11.955 Td [(to)-250(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf 52.303 -19.925 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.483 Td [(list)]TJ + [-525(psb_gedot\050x1,y1,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 19.358 0 Td [(the)-250(list)-250(of)-250(adjacent)-250(pr)18(ocesses.)]TJ 5.549 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F59 9.9626 Tf 250.209 0 Td [(psb_ipk_)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -316.959 -20.813 Td [(Note:)-596(this)-392(method)-393(can)-393(be)-393(called)-393(after)-392(a)-393(call)-393(to)]TJ/F59 9.9626 Tf 216.367 0 Td [(psb_cdall)]TJ/F54 9.9626 Tf 50.987 0 Td [(and)-393(befor)18(e)-393(a)-392(call)]TJ -267.354 -11.955 Td [(to)]TJ/F59 9.9626 Tf 11.711 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)-470(The)-304(user)-303(is)-304(specifying)-303(her)18(e)-304(some)-303(knowledge)-304(about)-303(which)-304(pr)18(o-)]TJ -58.784 -11.955 Td [(cesses)-208(ar)18(e)-208(topol)1(ogical)-208(neighbours)-208(of)-208(the)-207(curr)18(ent)-208(pr)18(ocess.)-296(The)-208(availability)-207(of)-208(this)]TJ 0 -11.955 Td [(information)-250(may)-250(speed)-250(up)-250(the)-250(execution)-250(of)-250(the)-250(assembly)-250(call)]TJ/F59 9.9626 Tf 269.656 0 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)]TJ/F51 9.9626 Tf -316.729 -26.934 Td [(3.1.14)-1000(fnd)]TJ -ET -q -1 0 0 1 151.412 247.089 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 154.401 246.89 Td [(owner)-250(\227)-250(Find)-250(the)-250(owner)-250(process)-250(of)-250(a)-250(set)-250(of)-250(indices)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 0 -11.956 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(2)]TJ 0 g 0 G -/F59 9.9626 Tf -54.506 -18.964 Td [(call)-525(desc%fnd_owner\050idx,iprc,info\051)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -20.813 Td [(T)90(ype:)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.483 Td [(On)-250(Entry)]TJ + [-525(psb_gedot\050x2,y2,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.484 Td [(idx)]TJ + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(the)-250(list)-250(of)-250(global)-250(indices)-250(for)-250(which)-250(we)-250(need)-250(the)-250(owning)-250(pr)18(ocesses.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one-dimensional)-250(array)-250(of)-250(integers)-250(of)-250(kind)]TJ/F59 9.9626 Tf 250.209 0 Td [(psb_lpk_)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -150.084 -29.888 Td [(15)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_gedot\050x3,y3,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-525(psb_sum\050ctxt,vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ +0 g 0 G + [(:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ +0 g 0 G + [(\051\051)]TJ/F62 9.9626 Tf -52.303 -19.925 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ +0 g 0 G + 141.968 -282.939 Td [(33)]TJ 0 g 0 G ET endstream endobj -943 0 obj +1159 0 obj << -/Length 7676 +/Length 8519 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -21.934 Td [(iprc)]TJ -0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(the)-250(list)-250(of)-250(pr)18(ocesses)-250(owning)-250(the)-250(indices)-250(in)]TJ/F59 9.9626 Tf 184.993 0 Td [(idx)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ -198.463 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-408(as:)-627(an)-408(allocatable)-408(one-dimensional)-408(array)-409(of)-408(integers)-408(of)-408(kind)]TJ/F59 9.9626 Tf 0 -11.955 Td [(psb_ipk_)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ -66.749 -23.926 Td [(Note:)-349(this)-269(method)-269(may)-270(or)-269(may)-269(not)-269(actually)-270(r)18(equir)18(e)-269(communications,)-274(depend-)]TJ 0 -11.956 Td [(ing)-283(on)-283(the)-283(ex)1(a)-1(c)1(t)-283(internal)-283(data)-283(storage;)-299(given)-283(that)-283(the)-283(choice)-283(of)-282(storage)-283(may)-283(be)]TJ 0 -11.955 Td [(alter)18(ed)-376(by)-375(r)8(untime)-376(parameters,)-407(it)-376(is)-375(necessary)-376(for)-376(safety)-375(that)-376(this)-375(method)-376(is)]TJ 0 -11.955 Td [(called)-250(by)-250(all)-250(pr)18(ocesses.)]TJ/F51 9.9626 Tf 0 -29.998 Td [(3.1.15)-1000(Named)-250(Constants)]TJ -0 g 0 G - 0 -19.937 Td [(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.3)-1000(psb)]TJ ET q -1 0 0 1 167.9 514.891 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 170.889 514.692 Td [(none)]TJ -ET -q -1 0 0 1 194.182 514.891 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +/F59 11.9552 Tf 201.825 706.129 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Product)]TJ/F62 9.9626 Tf -51.12 -18.964 Td [(This)-283(subr)18(outine)-284(computes)-283(a)-284(series)-284(of)-283(dot)-284(pr)18(oducts)-283(among)-284(the)-283(columns)-284(of)-283(two)]TJ 0 -11.955 Td [(dense)-250(matrices)]TJ/F60 9.9626 Tf 68.208 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(:)]TJ/F60 9.9626 Tf 24.807 -13.101 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.293 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F62 9.9626 Tf 4.274 0 Td [(:)-12(,)]TJ/F60 9.9626 Tf 6.821 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F60 7.5716 Tf 4.343 4.114 Td [(T)]TJ/F60 9.9626 Tf 5.525 -4.114 Td [(y)]TJ/F93 10.3811 Tf 5.23 0 Td [(\050)]TJ/F62 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F60 9.9626 Tf 6.821 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F62 9.9626 Tf -214.288 -16.876 Td [(If)-300(the)-299(matrices)-300(ar)18(e)-299(complex,)-312(then)-300(the)-300(usual)-299(convention)-300(applies,)-312(i.e.)-459(the)-299(conju-)]TJ 0 -11.955 Td [(gate)-239(transpose)-239(of)]TJ/F60 9.9626 Tf 77.351 0 Td [(x)]TJ/F62 9.9626 Tf 7.589 0 Td [(is)-239(used.)-307(If)]TJ/F60 9.9626 Tf 45.493 0 Td [(x)]TJ/F62 9.9626 Tf 7.589 0 Td [(and)]TJ/F60 9.9626 Tf 19.375 0 Td [(y)]TJ/F62 9.9626 Tf 7.489 0 Td [(ar)18(e)-239(of)-239(rank)-240(one,)-241(then)]TJ/F60 9.9626 Tf 92.601 0 Td [(r)-17(e)-25(s)]TJ/F62 9.9626 Tf 14.552 0 Td [(is)-239(a)-240(scalar)75(,)-242(else)-239(it)]TJ -272.039 -11.955 Td [(is)-250(a)-250(rank)-250(one)-250(array)111(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 202.152 514.692 Td [(Generic)-250(no-op;)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 20.174 -11.955 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -51.447 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 492.957 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 492.758 Td [(root)]TJ -ET -q -1 0 0 1 189.758 492.957 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + [-525(psb_gedots\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 197.728 492.758 Td [(Default)-250(r)18(oot)-250(pr)18(ocess)-250(for)-250(br)18(oadcast)-250(and)-250(scatter)-250(operations;)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -47.023 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 471.023 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 470.824 Td [(nohalo)]TJ -ET -q -1 0 0 1 203.038 471.023 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F54 9.9626 Tf 211.008 470.824 Td [(Do)-250(not)-250(fetch)-250(halo)-250(elements;)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -60.303 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 449.089 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 448.89 Td [(halo)]TJ -ET -q -1 0 0 1 191.412 449.089 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q 0 g 0 G -BT -/F54 9.9626 Tf 199.382 448.89 Td [(Fetch)-250(halo)-250(elements)-250(fr)18(om)-250(neighbouring)-250(pr)18(ocesses;)]TJ 0 g 0 G -/F51 9.9626 Tf -48.677 -21.934 Td [(psb)]TJ ET q -1 0 0 1 167.9 427.155 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 230.392 595.704 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F51 9.9626 Tf 170.889 426.956 Td [(sum)]TJ +/F60 9.9626 Tf 236.394 587.136 Td [(r)-17(e)-25(s)]TJ/F62 9.9626 Tf 12.17 0 Td [(,)]TJ/F60 9.9626 Tf 5.275 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F59 9.9626 Tf 93.135 0 Td [(Subroutine)]TJ ET q -1 0 0 1 190.854 427.155 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 230.392 583.351 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 198.824 426.956 Td [(Sum)-250(overlapped)-250(elements)]TJ -0 g 0 G -/F51 9.9626 Tf -48.119 -21.934 Td [(psb)]TJ +/F62 9.9626 Tf 236.369 574.783 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 167.9 405.221 cm +1 0 0 1 373.603 574.982 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 405.022 Td [(avg)]TJ +/F62 9.9626 Tf 376.592 574.783 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 187.546 405.221 cm +1 0 0 1 373.603 563.027 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 195.517 405.022 Td [(A)92(verage)-250(overlapped)-250(elements)]TJ -0 g 0 G -/F51 9.9626 Tf -44.812 -21.934 Td [(psb)]TJ +/F62 9.9626 Tf 376.592 562.828 Td [(gedots)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 167.9 383.288 cm +1 0 0 1 373.603 551.072 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 383.088 Td [(comm)]TJ +/F62 9.9626 Tf 376.592 550.872 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 199.163 383.288 cm +1 0 0 1 373.603 539.116 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 202.152 383.088 Td [(halo)]TJ +/F62 9.9626 Tf 376.592 538.917 Td [(gedots)]TJ ET q -1 0 0 1 222.674 383.288 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 230.392 535.131 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 230.645 383.088 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.92 0 Td [(halo_index)]TJ/F54 9.9626 Tf 54.794 0 Td [(list;)]TJ +/F62 9.9626 Tf 280.768 506.753 Td [(T)92(able)-250(3:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F59 9.9626 Tf -130.063 -32.002 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.22 Td [(On)-250(Entry)]TJ +0 g 0 G 0 g 0 G -/F51 9.9626 Tf -259.654 -21.934 Td [(psb)]TJ + 0 -19.22 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ ET q -1 0 0 1 167.9 361.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 436.673 388.689 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 170.889 361.154 Td [(comm)]TJ +/F67 9.9626 Tf 439.811 388.49 Td [(T)]TJ ET q -1 0 0 1 199.163 361.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 445.669 388.689 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 202.152 361.154 Td [(ext)]TJ +/F67 9.9626 Tf 448.807 388.49 Td [(vect)]TJ ET q -1 0 0 1 216.029 361.354 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 470.356 388.689 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 224 361.154 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.92 0 Td [(ext_index)]TJ/F54 9.9626 Tf 49.564 0 Td [(list;)]TJ +/F67 9.9626 Tf 473.495 388.49 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -247.779 -21.934 Td [(psb)]TJ -ET -q -1 0 0 1 167.9 339.42 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 170.889 339.22 Td [(comm)]TJ -ET -q -1 0 0 1 199.163 339.42 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 202.152 339.22 Td [(ovr)]TJ +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-312(3)]TJ +0 g 0 G + [(.)-494(The)-312(rank)-312(of)]TJ/F60 9.9626 Tf 274.834 0 Td [(x)]TJ/F62 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -82.958 -19.221 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 217.703 339.42 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 436.673 297.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 225.673 339.22 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.92 0 Td [(ovrlap_index)]TJ/F54 9.9626 Tf 65.255 0 Td [(list;)]TJ -0 g 0 G -/F51 9.9626 Tf -265.143 -21.934 Td [(psb)]TJ +/F67 9.9626 Tf 439.811 297.539 Td [(T)]TJ ET q -1 0 0 1 167.9 317.486 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 445.669 297.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 170.889 317.286 Td [(comm)]TJ +/F67 9.9626 Tf 448.807 297.539 Td [(vect)]TJ ET q -1 0 0 1 199.163 317.486 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 470.356 297.738 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 202.152 317.286 Td [(mov)]TJ +/F67 9.9626 Tf 473.495 297.539 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.956 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-313(3)]TJ +0 g 0 G + [(.)-500(The)-314(rank)-313(of)]TJ/F60 9.9626 Tf 274.898 0 Td [(y)]TJ/F62 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 53.116 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -83.227 -19.22 Td [(desc)]TJ ET q -1 0 0 1 222.684 317.486 cm +1 0 0 1 171.218 254.607 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 230.654 317.286 Td [(Exchange)-250(data)-250(based)-250(on)-250(the)]TJ/F59 9.9626 Tf 124.921 0 Td [(ovr_mst_idx)]TJ/F54 9.9626 Tf 60.024 0 Td [(list;)]TJ/F51 11.9552 Tf -264.894 -31.99 Td [(3.2)-1000(Sparse)-250(Matrix)-250(class)]TJ/F54 9.9626 Tf 0 -19.937 Td [(The)]TJ/F59 9.9626 Tf 19.623 0 Td [(psb)]TJ +/F59 9.9626 Tf 174.207 254.408 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 186.647 265.558 cm +1 0 0 1 324.173 206.786 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 189.785 265.359 Td [(Tspmat)]TJ +/F67 9.9626 Tf 327.311 206.587 Td [(desc)]TJ ET q -1 0 0 1 221.795 265.558 cm +1 0 0 1 348.86 206.786 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 224.933 265.359 Td [(type)]TJ/F54 9.9626 Tf 23.868 0 Td [(class)-296(contains)-295(all)-296(information)-296(about)-296(the)-295(local)-296(portion)-296(of)]TJ -98.096 -11.955 Td [(the)-200(sparse)-199(matrix)-200(and)-199(its)-200(storage)-200(mode.)-293(Its)-199(design)-200(is)-200(based)-199(on)-200(the)-200(ST)74(A)74(TE)-199(design)]TJ 0 -11.955 Td [(pattern)-256([)]TJ -1 0 0 rg 1 0 0 RG - [(13)]TJ +/F67 9.9626 Tf 351.998 206.587 Td [(type)]TJ 0 g 0 G - [(])-255(as)-256(detailed)-256(in)-256([)]TJ -1 0 0 rg 1 0 0 RG - [(11)]TJ -0 g 0 G - [(];)-258(the)-256(type)-256(declaration)-255(is)-256(shown)-256(in)-256(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-255(2)]TJ -0 g 0 G - [-256(wher)18(e)]TJ/F59 9.9626 Tf 0 -11.956 Td [(T)]TJ/F54 9.9626 Tf 7.721 0 Td [(is)-250(a)-250(placeholder)-250(for)-250(the)-250(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ -0 g 0 G -/F51 9.9626 Tf -7.721 -21.431 Td [(S)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ +/F59 9.9626 Tf -222.214 -19.22 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -11.068 -21.934 Td [(D)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ -0 g 0 G -/F51 9.9626 Tf -13.28 -21.934 Td [(C)]TJ -0 g 0 G -/F54 9.9626 Tf 12.174 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ -0 g 0 G -/F51 9.9626 Tf -12.174 -21.934 Td [(Z)]TJ -0 g 0 G -/F54 9.9626 Tf 11.626 0 Td [(Double)-250(pr)18(ecision)-250(complex;)]TJ + 0 -19.221 Td [(res)]TJ 0 g 0 G -/F51 9.9626 Tf -11.626 -21.934 Td [(LS,LD,LC,LZ)]TJ +/F62 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F60 9.9626 Tf 126.33 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -151.968 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-289(as:)-389(a)-290(number)-290(or)-289(a)-290(rank-one)-289(array)-290(of)-289(the)-290(data)-289(type)-290(indicated)-289(in)]TJ 0 -11.955 Td [(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ 0 g 0 G -/F54 9.9626 Tf 65.026 0 Td [(Same)-214(numeric)-214(type)-215(as)-214(above,)-221(but)-214(with)]TJ/F59 9.9626 Tf 168.016 0 Td [(psb_lpk_)]TJ/F54 9.9626 Tf 43.978 0 Td [(integer)-214(indices.)]TJ + [(.)]TJ 0 g 0 G - -110.146 -29.888 Td [(16)]TJ + 141.967 -29.888 Td [(34)]TJ 0 g 0 G ET endstream endobj -950 0 obj +1063 0 obj +<< +/Type /ObjStm +/N 100 +/First 957 +/Length 10603 +>> +stream +1061 0 127 58 131 115 135 172 1058 229 1065 323 1067 441 139 500 143 558 1064 616 +1069 710 1071 828 147 886 151 943 1068 1000 1073 1094 1075 1212 155 1271 159 1329 163 1387 +1072 1445 1077 1539 1079 1657 167 1715 1076 1771 1083 1865 1080 2013 1081 2159 1085 2307 171 2366 +175 2424 179 2481 183 2538 1086 2596 1082 2655 1089 2749 1091 2867 1087 2925 187 2983 191 3040 +195 3097 1088 3154 1096 3263 1093 3411 1094 3557 1098 3702 199 3761 1095 3819 1103 3913 1100 4052 +1105 4200 204 4258 208 4315 212 4371 1106 4428 1102 4486 1109 4593 1101 4732 1111 4879 1107 4938 +216 4997 1108 5055 1113 5164 1115 5282 220 5340 1112 5397 1124 5478 1116 5662 1117 5808 1118 5952 +1119 6098 1120 6244 1121 6388 1126 6534 224 6593 1099 6651 1123 6710 1131 6857 1122 7014 1128 7161 +1129 7305 1133 7451 1130 7509 1141 7616 1135 7791 1136 7932 1137 8078 1138 8222 1139 8367 1143 8514 +228 8573 1144 8631 1140 8690 1147 8810 1145 8949 1149 9095 1150 9153 1146 9211 1158 9318 1151 9502 +% 1061 0 obj +<< +/D [1059 0 R /XYZ 98.895 753.953 null] +>> +% 127 0 obj +<< +/D [1059 0 R /XYZ 99.895 716.092 null] +>> +% 131 0 obj +<< +/D [1059 0 R /XYZ 99.895 526.761 null] +>> +% 135 0 obj +<< +/D [1059 0 R /XYZ 99.895 326.359 null] +>> +% 1058 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1065 0 obj +<< +/Type /Page +/Contents 1066 0 R +/Resources 1064 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +>> +% 1067 0 obj +<< +/D [1065 0 R /XYZ 149.705 753.953 null] +>> +% 139 0 obj +<< +/D [1065 0 R /XYZ 150.705 716.092 null] +>> +% 143 0 obj +<< +/D [1065 0 R /XYZ 150.705 474.131 null] +>> +% 1064 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1069 0 obj +<< +/Type /Page +/Contents 1070 0 R +/Resources 1068 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +>> +% 1071 0 obj +<< +/D [1069 0 R /XYZ 98.895 753.953 null] +>> +% 147 0 obj +<< +/D [1069 0 R /XYZ 99.895 716.092 null] +>> +% 151 0 obj +<< +/D [1069 0 R /XYZ 99.895 376.562 null] +>> +% 1068 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1073 0 obj +<< +/Type /Page +/Contents 1074 0 R +/Resources 1072 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +>> +% 1075 0 obj +<< +/D [1073 0 R /XYZ 149.705 753.953 null] +>> +% 155 0 obj +<< +/D [1073 0 R /XYZ 150.705 716.092 null] +>> +% 159 0 obj +<< +/D [1073 0 R /XYZ 150.705 484.709 null] +>> +% 163 0 obj +<< +/D [1073 0 R /XYZ 150.705 251.325 null] +>> +% 1072 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1077 0 obj +<< +/Type /Page +/Contents 1078 0 R +/Resources 1076 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +>> +% 1079 0 obj +<< +/D [1077 0 R /XYZ 98.895 753.953 null] +>> +% 167 0 obj +<< +/D [1077 0 R /XYZ 99.895 476.15 null] +>> +% 1076 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1083 0 obj +<< +/Type /Page +/Contents 1084 0 R +/Resources 1082 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1062 0 R +/Annots [ 1080 0 R 1081 0 R ] +>> +% 1080 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [248.894 164.341 255.868 176.4] +/A << /S /GoTo /D (section.6) >> +>> +% 1081 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [343.512 128.475 350.485 140.535] +/A << /S /GoTo /D (listing.3) >> +>> +% 1085 0 obj +<< +/D [1083 0 R /XYZ 149.705 753.953 null] +>> +% 171 0 obj +<< +/D [1083 0 R /XYZ 150.705 716.092 null] +>> +% 175 0 obj +<< +/D [1083 0 R /XYZ 150.705 586.94 null] +>> +% 179 0 obj +<< +/D [1083 0 R /XYZ 150.705 402.59 null] +>> +% 183 0 obj +<< +/D [1083 0 R /XYZ 150.705 234.114 null] +>> +% 1086 0 obj +<< +/D [1083 0 R /XYZ 170.799 204.012 null] +>> +% 1082 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1089 0 obj +<< +/Type /Page +/Contents 1090 0 R +/Resources 1088 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +>> +% 1091 0 obj +<< +/D [1089 0 R /XYZ 98.895 753.953 null] +>> +% 1087 0 obj +<< +/D [1089 0 R /XYZ 99.895 446.997 null] +>> +% 187 0 obj +<< +/D [1089 0 R /XYZ 99.895 387.147 null] +>> +% 191 0 obj +<< +/D [1089 0 R /XYZ 99.895 370.604 null] +>> +% 195 0 obj +<< +/D [1089 0 R /XYZ 99.895 194.093 null] +>> +% 1088 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1096 0 obj +<< +/Type /Page +/Contents 1097 0 R +/Resources 1095 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +/Annots [ 1093 0 R 1094 0 R ] +>> +% 1093 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 383.557 435.942 395.616] +/A << /S /GoTo /D (table.1) >> +>> +% 1094 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 240.08 435.942 252.139] +/A << /S /GoTo /D (table.1) >> +>> +% 1098 0 obj +<< +/D [1096 0 R /XYZ 149.705 753.953 null] +>> +% 199 0 obj +<< +/D [1096 0 R /XYZ 150.705 610.712 null] +>> +% 1095 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1103 0 obj +<< +/Type /Page +/Contents 1104 0 R +/Resources 1102 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +/Annots [ 1100 0 R ] +>> +% 1100 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [127.814 152.385 134.788 164.445] +/A << /S /GoTo /D (listing.4) >> +>> +% 1105 0 obj +<< +/D [1103 0 R /XYZ 98.895 753.953 null] +>> +% 204 0 obj +<< +/D [1103 0 R /XYZ 99.895 716.092 null] +>> +% 208 0 obj +<< +/D [1103 0 R /XYZ 99.895 430.41 null] +>> +% 212 0 obj +<< +/D [1103 0 R /XYZ 99.895 226.203 null] +>> +% 1106 0 obj +<< +/D [1103 0 R /XYZ 258.62 168.146 null] +>> +% 1102 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1109 0 obj +<< +/Type /Page +/Contents 1110 0 R +/Resources 1108 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +/Annots [ 1101 0 R ] +>> +% 1101 0 obj << -/Length 7032 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [297.461 580.64 304.435 592.699] +/A << /S /GoTo /D (section.6) >> >> -stream -0 g 0 G -0 g 0 G -0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -q -1 0 0 1 99.895 671.26 cm -0 0 343.711 38.854 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 112.299 699.454 Td [(type)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ -0 g 0 G - [-525(psb_Tspmat_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.414 -10.959 Td [(class)]TJ -0 g 0 G - [(\050psb_T_base_sparse_mat\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-1050(::)]TJ -0 g 0 G - [-525(a)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.414 -10.959 Td [(end)-525(type)]TJ -0 g 0 G - [-1050(psb_Tspmat_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 4.295 -41.429 Td [(Listing)-250(2:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(sparse)-250(matrix.)]TJ -16.699 -32.661 Td [(The)-190(actual)-190(data)-190(is)-190(contained)-190(in)-190(the)-190(polymorphic)-190(component)]TJ/F59 9.9626 Tf 259.484 0 Td [(a%a)]TJ/F54 9.9626 Tf 17.584 0 Td [(of)-190(type)]TJ/F59 9.9626 Tf 32.089 0 Td [(psb)]TJ -ET -q -1 0 0 1 425.371 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 428.509 603.446 Td [(T)]TJ -ET -q -1 0 0 1 434.367 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 437.505 603.446 Td [(base)]TJ -ET -q -1 0 0 1 459.054 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 462.193 603.446 Td [(sparse)]TJ -ET -q -1 0 0 1 494.202 603.645 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 497.341 603.446 Td [(mat)]TJ/F54 9.9626 Tf 15.691 0 Td [(;)]TJ -413.137 -11.955 Td [(its)-306(speci)1(\002c)-306(layout)-305(can)-306(be)-305(chosen)-306(dynamically)-305(among)-306(the)-305(pr)18(ede\002ned)-306(types,)-319(or)]TJ 0 -11.955 Td [(an)-305(entir)18(ely)-305(new)-305(storage)-305(layout)-305(can)-305(be)-305(implemented)-304(and)-305(passed)-305(to)-305(the)-305(library)]TJ 0 -11.955 Td [(at)-231(r)8(untime)-231(via)-231(the)]TJ/F59 9.9626 Tf 80.145 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 49.377 0 Td [(r)18(outine.)-304(The)-231(following)-231(very)-231(common)-231(formats)-231(ar)18(e)]TJ -129.522 -11.955 Td [(pr)18(ecompiled)-250(in)-250(PSBLAS)-250(and)-250(thus)-250(ar)18(e)-250(always)-250(available:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.889 Td [(psb)]TJ -ET -q -1 0 0 1 117.091 535.936 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 120.08 535.737 Td [(T)]TJ -ET -q -1 0 0 1 127.322 535.936 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 130.311 535.737 Td [(coo)]TJ -ET -q -1 0 0 1 146.411 535.936 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 149.399 535.737 Td [(sparse)]TJ -ET -q -1 0 0 1 178.769 535.936 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 181.758 535.737 Td [(mat)]TJ -0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Coor)18(dinate)-250(storage;)]TJ -0 g 0 G -/F51 9.9626 Tf -104 -19.907 Td [(psb)]TJ -ET -q -1 0 0 1 117.091 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 120.08 515.83 Td [(T)]TJ -ET -q -1 0 0 1 127.322 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 130.311 515.83 Td [(csr)]TJ -ET -q -1 0 0 1 143.631 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 146.62 515.83 Td [(sparse)]TJ -ET -q -1 0 0 1 175.989 516.03 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 178.978 515.83 Td [(mat)]TJ -0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(r)18(ows;)]TJ -0 g 0 G -/F51 9.9626 Tf -101.22 -19.906 Td [(psb)]TJ -ET -q -1 0 0 1 117.091 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 120.08 495.924 Td [(T)]TJ -ET -q -1 0 0 1 127.322 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 130.311 495.924 Td [(csc)]TJ -ET -q -1 0 0 1 144.179 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 147.168 495.924 Td [(sparse)]TJ -ET -q -1 0 0 1 176.537 496.123 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 179.526 495.924 Td [(mat)]TJ -0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Compr)18(essed)-250(storage)-250(by)-250(columns;)]TJ -101.768 -19.889 Td [(The)-295(inner)-295(sparse)-294(matrix)-295(has)-295(an)-295(associated)-294(state,)-306(which)-295(can)-295(take)-294(the)-295(following)]TJ 0 -11.955 Td [(values:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.888 Td [(Build:)]TJ -0 g 0 G -/F54 9.9626 Tf 32.927 0 Td [(State)-283(enter)18(ed)-283(after)-283(the)-283(\002rst)-284(allocation,)-291(and)-283(befor)18(e)-283(the)-283(\002rst)-283(assembly;)-300(in)]TJ -8.02 -11.955 Td [(this)-250(state)-250(it)-250(is)-250(possible)-250(to)-250(add)-250(nonzer)18(o)-250(entries.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.907 Td [(Assembled:)]TJ +% 1111 0 obj +<< +/D [1109 0 R /XYZ 149.705 753.953 null] +>> +% 1107 0 obj +<< +/D [1109 0 R /XYZ 150.705 665.282 null] +>> +% 216 0 obj +<< +/D [1109 0 R /XYZ 150.705 613.372 null] +>> +% 1108 0 obj +<< +/Font << /F102 1016 0 R /F62 667 0 R /F59 665 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1113 0 obj +<< +/Type /Page +/Contents 1114 0 R +/Resources 1112 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +>> +% 1115 0 obj +<< +/D [1113 0 R /XYZ 98.895 753.953 null] +>> +% 220 0 obj +<< +/D [1113 0 R /XYZ 99.895 716.092 null] +>> +% 1112 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1124 0 obj +<< +/Type /Page +/Contents 1125 0 R +/Resources 1123 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1092 0 R +/Annots [ 1116 0 R 1117 0 R 1118 0 R 1119 0 R 1120 0 R 1121 0 R ] +>> +% 1116 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 413.274 435.942 425.334] +/A << /S /GoTo /D (table.1) >> +>> +% 1117 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 345.063 495.412 357.123] +/A << /S /GoTo /D (vdata) >> +>> +% 1118 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.755 333.108 388.729 345.168] +/A << /S /GoTo /D (table.1) >> +>> +% 1119 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 252.942 435.942 265.002] +/A << /S /GoTo /D (table.1) >> +>> +% 1120 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 184.731 495.412 196.791] +/A << /S /GoTo /D (vdata) >> +>> +% 1121 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [397.868 172.776 404.842 184.835] +/A << /S /GoTo /D (table.1) >> +>> +% 1126 0 obj +<< +/D [1124 0 R /XYZ 149.705 753.953 null] +>> +% 224 0 obj +<< +/D [1124 0 R /XYZ 150.705 716.092 null] +>> +% 1099 0 obj +<< +/D [1124 0 R /XYZ 150.705 558.947 null] +>> +% 1123 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F68 1127 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1131 0 obj +<< +/Type /Page +/Contents 1132 0 R +/Resources 1130 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +/Annots [ 1122 0 R 1128 0 R 1129 0 R ] +>> +% 1122 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 690.368 323.106 702.428] +/A << /S /GoTo /D (descdata) >> +>> +% 1128 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 600.704 444.603 612.764] +/A << /S /GoTo /D (vdata) >> +>> +% 1129 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [344.963 588.749 351.937 600.809] +/A << /S /GoTo /D (table.1) >> +>> +% 1133 0 obj +<< +/D [1131 0 R /XYZ 98.895 753.953 null] +>> +% 1130 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1141 0 obj +<< +/Type /Page +/Contents 1142 0 R +/Resources 1140 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +/Annots [ 1135 0 R 1136 0 R 1137 0 R 1138 0 R 1139 0 R ] +>> +% 1135 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 355.7 495.412 367.76] +/A << /S /GoTo /D (vdata) >> +>> +% 1136 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.755 343.745 388.729 355.804] +/A << /S /GoTo /D (table.2) >> +>> +% 1137 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 264.018 495.412 276.078] +/A << /S /GoTo /D (vdata) >> +>> +% 1138 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.88 252.063 388.854 264.123] +/A << /S /GoTo /D (table.2) >> +>> +% 1139 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 172.336 373.916 184.396] +/A << /S /GoTo /D (descdata) >> +>> +% 1143 0 obj +<< +/D [1141 0 R /XYZ 149.705 753.953 null] +>> +% 228 0 obj +<< +/D [1141 0 R /XYZ 150.705 716.092 null] +>> +% 1144 0 obj +<< +/D [1141 0 R /XYZ 150.705 499.951 null] +>> +% 1140 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1147 0 obj +<< +/Type /Page +/Contents 1148 0 R +/Resources 1146 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +/Annots [ 1145 0 R ] +>> +% 1145 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [378.159 602.697 385.133 614.756] +/A << /S /GoTo /D (table.2) >> +>> +% 1149 0 obj +<< +/D [1147 0 R /XYZ 98.895 753.953 null] +>> +% 1150 0 obj +<< +/D [1147 0 R /XYZ 99.895 512.639 null] +>> +% 1146 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1158 0 obj +<< +/Type /Page +/Contents 1159 0 R +/Resources 1157 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +/Annots [ 1151 0 R 1152 0 R 1153 0 R 1154 0 R 1155 0 R 1156 0 R ] +>> +% 1151 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 384.684 495.412 396.744] +/A << /S /GoTo /D (vdata) >> +>> + +endstream +endobj +1165 0 obj +<< +/Length 582 +>> +stream 0 g 0 G -/F54 9.9626 Tf 58.381 0 Td [(State)-324(enter)18(ed)-325(after)-324(the)-325(assembly;)-362(computations)-324(using)-325(the)-324(sparse)]TJ -33.474 -11.955 Td [(matrix,)-250(such)-250(as)-250(matrix-vector)-250(pr)18(oducts,)-250(ar)18(e)-250(only)-250(possible)-250(in)-250(this)-250(state;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.907 Td [(Update:)]TJ 0 g 0 G -/F54 9.9626 Tf 40.678 0 Td [(State)-219(enter)18(ed)-220(after)-219(a)-219(r)18(einitalization;)-230(this)-219(is)-219(used)-220(to)-219(handle)-219(applications)]TJ -15.771 -11.955 Td [(in)-288(which)-288(the)-288(same)-288(sparsity)-289(pattern)-288(is)-288(used)-288(multiple)-288(times)-288(with)-288(dif)18(fer)18(ent)]TJ 0 -11.955 Td [(coef)18(\002cients.)-298(In)-213(this)-214(state)-213(it)-214(is)-213(only)-214(possible)-213(to)-214(enter)-213(coef)18(\002cients)-214(for)-213(alr)18(eady)]TJ 0 -11.956 Td [(existing)-250(nonzer)18(o)-250(entries.)]TJ -24.907 -19.888 Td [(The)-293(only)-292(storage)-293(variant)-292(supporting)-293(the)-293(build)-292(state)-293(is)-292(COO;)-293(all)-293(other)-292(variants)]TJ 0 -11.955 Td [(ar)18(e)-250(obtained)-250(by)-250(conversion)-250(to/fr)18(om)-250(it.)]TJ/F51 9.9626 Tf 0 -27.132 Td [(3.2.1)-1000(Sparse)-250(Matrix)-250(Methods)]TJ 0 -18.964 Td [(3.2.2)-1000(get)]TJ -ET -q -1 0 0 1 144.219 266.863 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q BT -/F51 9.9626 Tf 147.208 266.663 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -47.313 -18.963 Td [(nr)-525(=)-525(a%get_nrows\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.872 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.907 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.907 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -33.827 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.906 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 165.298 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ -0 g 0 G - -76.431 -29.888 Td [(17)]TJ + 141.968 -567.87 Td [(35)]TJ 0 g 0 G ET endstream endobj -955 0 obj +1172 0 obj << -/Length 3860 +/Length 7777 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.3)-1000(get)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.4)-1000(psb)]TJ ET q -1 0 0 1 195.029 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 198.017 706.129 Td [(ncols)-250(\227)-250(Get)-250(number)-250(of)-250(columns)-250(in)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -47.312 -19.023 Td [(nc)-525(=)-525(a%get_ncols\050\051)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.12 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 233.576 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -238.781 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.459 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(in\002nity)-250(norm)-250(as:)]TJ/F60 9.9626 Tf 117.807 -18.736 Td [(a)-25(m)-40(a)-42(x)]TJ/F91 10.3811 Tf 25.761 0 Td [(\040)]TJ/F62 9.9626 Tf 13.272 0 Td [(max)]TJ/F60 7.5716 Tf 8.355 -7.21 Td [(i)]TJ/F91 10.3811 Tf 12.349 7.21 Td [(j)]TJ/F60 9.9626 Tf 3.298 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F62 9.9626 Tf -206.019 -23.313 Td [(else)-250(if)]TJ/F60 9.9626 Tf 28.159 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(the)-250(in\002nity-norm)-250(as:)]TJ/F60 9.9626 Tf 63.42 -18.737 Td [(a)-25(m)-40(a)-42(x)]TJ/F91 10.3811 Tf 25.761 0 Td [(\040)]TJ/F62 9.9626 Tf 13.273 0 Td [(max)]TJ/F60 7.5716 Tf 8.354 -7.21 Td [(i)]TJ/F93 10.3811 Tf 12.35 7.21 Td [(\050)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ/F60 9.9626 Tf 3.028 0 Td [(r)-17(e)]TJ/F93 10.3811 Tf 8.17 0 Td [(\050)]TJ/F60 9.9626 Tf 4.443 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F93 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ/F93 10.3811 Tf 5.066 0 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(j)]TJ/F60 9.9626 Tf 3.058 0 Td [(i)-32(m)]TJ/F93 10.3811 Tf 11.088 0 Td [(\050)]TJ/F60 9.9626 Tf 4.444 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F93 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ/F93 10.3811 Tf 3.128 0 Td [(\051)]TJ/F67 9.9626 Tf -225.616 -22.974 Td [(psb_geamax\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -20.049 Td [(On)-250(Entry)]TJ + [-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_normi\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.048 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -56.338 -33.965 Td [(On)-250(Return)]TJ + [-525([,global]\051)]TJ 0 g 0 G 0 g 0 G - 0 -20.048 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 181.158 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ/F51 9.9626 Tf -259.165 -27.315 Td [(3.2.4)-1000(get)]TJ ET q -1 0 0 1 195.029 531.915 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 179.304 566.399 cm +[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S Q BT -/F51 9.9626 Tf 198.017 531.716 Td [(nnzeros)-250(\227)-250(Get)-250(number)-250(of)-250(nonzero)-250(elements)-250(in)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -47.312 -19.024 Td [(nz)-525(=)-525(a%get_nnzeros\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ -0 g 0 G - -56.338 -33.965 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(nonzer)18(o)-250(elements)-250(stor)18(ed)-250(in)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 251.284 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ/F51 9.9626 Tf -329.292 -22.041 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -20.017 Td [(1.)]TJ -0 g 0 G - [-500(The)-395(function)-395(value)-395(is)-395(speci\002c)-395(to)-395(the)-395(storage)-395(format)-395(of)-396(matri)1(x)]TJ/F59 9.9626 Tf 295.646 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(;)-468(some)]TJ -288.422 -11.956 Td [(storage)-343(formats)-342(employ)-343(padding,)-366(thus)-343(the)-342(r)18(eturned)-343(value)-343(for)-342(the)-343(same)]TJ 0 -11.955 Td [(matrix)-250(may)-250(be)-250(dif)18(fer)18(ent)-250(for)-250(dif)18(fer)18(ent)-250(storage)-250(choices.)]TJ/F51 9.9626 Tf -24.907 -27.315 Td [(3.2.5)-1000(get)]TJ +/F60 9.9626 Tf 185.556 557.832 Td [(a)-25(m)-40(a)-42(x)-7779(x)]TJ/F59 9.9626 Tf 220.764 0 Td [(Function)]TJ ET q -1 0 0 1 195.029 291.533 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 179.304 554.046 cm +[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S Q BT -/F51 9.9626 Tf 198.017 291.334 Td [(size)-398(\227)-397(Get)-398(maximum)-397(number)-398(of)-398(nonzero)-397(elements)-398(in)-398(a)-397(sparse)]TJ -17.424 -11.955 Td [(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -29.888 -19.024 Td [(maxnz)-525(=)-525(a%get_size\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.01 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.048 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ -0 g 0 G - -56.338 -33.965 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -20.048 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-220(maximum)-220(number)-219(of)-220(nonzer)18(o)-220(elements)-220(that)-220(can)-219(be)-220(stor)18(ed)]TJ -47.87 -11.955 Td [(in)-250(sparse)-250(matrix)]TJ/F59 9.9626 Tf 73.294 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(using)-250(its)-250(curr)18(ent)-250(memory)-250(allocation.)]TJ -0 g 0 G - 60.952 -29.888 Td [(18)]TJ -0 g 0 G +/F62 9.9626 Tf 185.282 545.478 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET - -endstream -endobj -960 0 obj -<< -/Length 4133 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 422.639 545.677 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.2.6)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(sparse)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -20.135 Td [(memory_size)-525(=)-525(a%sizeof\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -23.732 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -35.687 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F51 9.9626 Tf -72.777 -30.58 Td [(3.2.7)-1000(get)]TJ +/F62 9.9626 Tf 425.628 545.478 Td [(geamax)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 144.219 517.21 cm +1 0 0 1 422.639 533.722 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 147.208 517.011 Td [(fmt)-250(\227)-250(Short)-250(description)-250(of)-250(the)-250(dynamic)-250(type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -47.313 -20.135 Td [(write)]TJ -0 g 0 G - [(\050)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(*)]TJ -0 g 0 G - [(,)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(*)]TJ -0 g 0 G - [(\051)-525(a%get_fmt\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -35.686 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)-244(short)-245(string)-244(describing)-245(the)-244(dynamic)-245(type)-244(of)-245(the)-244(matrix.)-308(Pr)18(e-)]TJ -47.87 -11.955 Td [(de\002ned)-250(values)-250(include)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 102.415 0 Td [(NULL)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)]TJ/F59 9.9626 Tf 4.981 0 Td [(COO)]TJ/F54 9.9626 Tf 15.691 0 Td [(,)]TJ/F59 9.9626 Tf 4.982 0 Td [(CSR)]TJ/F54 9.9626 Tf 18.181 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(CSC)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ/F51 9.9626 Tf -227.127 -30.581 Td [(3.2.8)-1000(is)]TJ +/F62 9.9626 Tf 425.628 533.523 Td [(geamax)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 138.122 315.533 cm +1 0 0 1 422.639 521.767 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 141.111 315.333 Td [(bld,)-250(is)]TJ +/F62 9.9626 Tf 425.628 521.568 Td [(geamax)]TJ -240.346 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 169.922 315.533 cm +1 0 0 1 422.639 509.812 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 172.911 315.333 Td [(upd,)-250(is)]TJ +/F62 9.9626 Tf 425.628 509.612 Td [(geamax)]TJ ET q -1 0 0 1 204.493 315.533 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 179.304 505.827 cm +[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S Q -BT -/F51 9.9626 Tf 207.482 315.333 Td [(asb)-250(\227)-250(Status)-250(check)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -107.587 -20.135 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_bld\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_upd\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_asb\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0 g 0 G -/F51 9.9626 Tf 0 -24.336 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.343 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.343 Td [(a)]TJ +BT +/F62 9.9626 Tf 280.768 477.448 Td [(T)92(able)-250(4:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G - -56.339 -35.686 Td [(On)-250(Return)]TJ 0 g 0 G +/F59 9.9626 Tf -130.063 -30.014 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -22.343 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 9.966 0 Td [(logical)]TJ +/F59 9.9626 Tf -29.828 -18.652 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 38.827 0 Td [(value)-222(indicating)-223(whether)-222(the)-222(matrix)-223(is)-222(in)-222(the)-223(Build,)]TJ -96.663 -11.955 Td [(Update)-250(or)-250(Assembled)-250(state,)-250(r)18(espectively)111(.)]TJ 0 g 0 G - 141.968 -29.888 Td [(19)]TJ + 0 -18.653 Td [(x)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ ET - -endstream -endobj -965 0 obj -<< -/Length 5143 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 436.673 362.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.9)-1000(is)]TJ +/F67 9.9626 Tf 439.811 362.308 Td [(T)]TJ ET q -1 0 0 1 188.931 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 445.669 362.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 191.92 706.129 Td [(lower)55(,)-250(is)]TJ +/F67 9.9626 Tf 448.807 362.308 Td [(vect)]TJ ET q -1 0 0 1 230.704 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 470.356 362.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 233.693 706.129 Td [(upper)55(,)-250(is)]TJ +/F67 9.9626 Tf 473.495 362.308 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(4)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -18.652 Td [(desc)]TJ ET q -1 0 0 1 273.583 706.328 cm +1 0 0 1 171.218 331.9 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 276.572 706.129 Td [(triangle,)-250(is)]TJ +/F59 9.9626 Tf 174.207 331.701 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 284.079 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 327.311 283.88 Td [(desc)]TJ ET q -1 0 0 1 325.309 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 348.86 284.079 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 328.298 706.129 Td [(unit)-250(\227)-250(Format)-250(check)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -177.593 -19.573 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_triangle\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050a%is_upper\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +/F67 9.9626 Tf 351.998 283.88 Td [(type)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [-525(\050a%is_lower\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +/F59 9.9626 Tf -222.214 -18.653 Td [(global)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.956 Td [(if)]TJ +/F62 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F67 9.9626 Tf 165.319 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-525(\050a%is_unit\050\051\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ + [(.true.)]TJ 0 g 0 G +/F59 9.9626 Tf -190.225 -30.607 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.86 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -18.653 Td [(Function)-250(value)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(vector)]TJ/F60 9.9626 Tf 128.562 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -181.637 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.133 0 Td [(global)]TJ/F62 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F67 9.9626 Tf 121.612 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.)]TJ/F62 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G - 0 -21.183 Td [(a)]TJ + 141.968 -29.888 Td [(36)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ +ET + +endstream +endobj +1177 0 obj +<< +/Length 3043 +>> +stream 0 g 0 G - -56.338 -34.816 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -21.183 Td [(Function)-250(value)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)]TJ -0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -/F59 9.9626 Tf 10.803 0 Td [(logical)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 39.665 0 Td [(value)-306(indicating)-307(whether)-306(the)-307(matrix)-306(is)-306(triangular;)]TJ -98.338 -11.955 Td [(if)]TJ/F59 9.9626 Tf 8.595 0 Td [(is_triangle\050\051)]TJ/F54 9.9626 Tf 70.373 0 Td [(r)18(eturns)]TJ/F59 9.9626 Tf 34.119 0 Td [(.true.)]TJ/F54 9.9626 Tf 33.761 0 Td [(check)-239(also)-238(if)-239(it)-239(is)-239(lower)74(,)-241(upper)-238(and)-239(with)]TJ -146.848 -11.955 Td [(a)-250(unit)-250(\050i.e.)-310(assumed\051)-250(diagonal.)]TJ/F51 9.9626 Tf -24.907 -28.929 Td [(3.2.10)-1000(cscnv)-250(\227)-250(Convert)-250(to)-250(a)-250(dif)18(ferent)-250(storage)-250(format)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -19.573 Td [(call)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - [-1050(a%cscnv\050b,info)-525([,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(type)]TJ + [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-332(at)-333(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(,)-525(mold,)-525(dupl]\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +/F67 9.9626 Tf 52.303 -19.926 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G - [-1050(a%cscnv\050info)-525([,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(type)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(,)-525(mold,)-525(dupl]\051)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.861 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(psb_geamax\050x1,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.183 Td [(On)-250(Entry)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(2)]TJ 0 g 0 G - 0 -21.183 Td [(a)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -33.138 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.906 0 Td [(a)-250(string)-250(r)18(equesting)-250(a)-250(new)-250(format.)]TJ 0.001 -11.955 Td [(T)90(ype:)-310(optional.)]TJ + [-525(psb_geamax\050x2,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.183 Td [(mold)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(a)-236(variable)-236(of)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 56.403 0 Td [(class)]TJ + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G - [(\050psb_T_base_sparse_mat\051)]TJ/F54 9.9626 Tf 148.803 0 Td [(r)18(equesting)-236(a)-236(new)-237(format)1(.)]TJ -209.081 -11.955 Td [(T)90(ype:)-310(optional.)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -21.182 Td [(dupl)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(an)-359(integer)-358(value)-359(speci\002ng)-358(how)-359(to)-359(handle)-358(duplicates)-359(\050see)-359(Named)-358(Con-)]TJ -1.653 -11.956 Td [(stants)-250(below\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -22.861 Td [(On)-250(Return)]TJ + [-525(psb_geamax\050x3,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -21.182 Td [(b,a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 18.54 0 Td [(A)-250(copy)-250(of)]TJ/F59 9.9626 Tf 45.37 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(with)-250(a)-250(new)-250(storage)-250(format.)]TJ -46.724 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.456 0 Td [(.)]TJ + [-525(psb_amx\050ctxt,vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -21.183 Td [(info)]TJ + [(:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ -23.801 -23.175 Td [(The)]TJ/F59 9.9626 Tf 19.584 0 Td [(mold)]TJ/F54 9.9626 Tf 23.827 0 Td [(ar)18(guments)-292(may)-291(be)-292(employed)-292(to)-292(interface)-291(with)-292(special)-292(devices,)-302(such)]TJ -43.411 -11.955 Td [(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ + [(\051\051)]TJ/F62 9.9626 Tf -52.303 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ 0 g 0 G - 166.874 -29.888 Td [(20)]TJ + 141.968 -402.49 Td [(37)]TJ 0 g 0 G ET endstream endobj -969 0 obj +1186 0 obj << -/Length 4477 +/Length 6432 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.2.11)-1000(csclip)-250(\227)-250(Reduce)-250(to)-250(a)-250(submatrix)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.922 -20.279 Td [(call)]TJ -0 g 0 G - [-525(a%csclip\050b,info[,&)]TJ 15.691 -11.955 Td [(&)-525(imin,imax,jmin,jmax,rscale,cscale]\051)]TJ/F54 9.9626 Tf -21.669 -24.631 Td [(Returns)-190(the)-190(submatrix)]TJ/F59 9.9626 Tf 98.878 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F54 9.9626 Tf 115.068 0 Td [(,)-202(optionally)-190(r)18(escaling)-190(r)18(ow/-)]TJ -228.89 -11.955 Td [(col)-250(indices)-250(to)-250(the)-250(range)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf 103.85 0 Td [(1)]TJ -0 g 0 G - [(:imax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ -0 g 0 G - [(imin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(,)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.5)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ/F62 9.9626 Tf -51.12 -18.964 Td [(This)-256(subr)18(outine)-255(computes)-256(a)-256(series)-255(of)-256(in\002nity)-256(norms)-256(on)-255(the)-256(columns)-256(of)-255(a)-256(dense)]TJ 0 -11.955 Td [(matrix)]TJ/F60 9.9626 Tf 31.785 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(:)]TJ/F60 9.9626 Tf 88.539 -11.955 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.294 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 7.042 0 Td [(\040)]TJ/F62 9.9626 Tf 13.273 0 Td [(max)]TJ/F60 7.5716 Tf 7.759 -7.336 Td [(k)]TJ/F91 10.3811 Tf 12.944 7.336 Td [(j)]TJ/F60 9.9626 Tf 3.298 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(k)]TJ/F62 9.9626 Tf 4.598 0 Td [(,)]TJ/F60 9.9626 Tf 4.206 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(:jmax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -195.028 -22.296 Td [(call)]TJ 0 g 0 G - [(jmin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + [-525(psb_geamaxs\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 141.219 0 Td [(.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -245.069 -21.961 Td [(T)90(ype:)]TJ + [-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ 0 g 0 G +ET +q +1 0 0 1 177.192 626.591 cm +[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +Q +BT +/F60 9.9626 Tf 183.195 618.023 Td [(r)-17(e)-25(s)-8868(x)]TJ/F59 9.9626 Tf 221.013 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 177.192 614.237 cm +[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +Q +BT +/F62 9.9626 Tf 183.17 605.669 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 420.527 605.868 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 423.516 605.669 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 420.527 593.913 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 423.516 593.714 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 420.527 581.958 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 423.516 581.759 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 420.527 570.003 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 423.516 569.804 Td [(geamaxs)]TJ +ET +q +1 0 0 1 177.192 566.018 cm +[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +Q 0 g 0 G - 0 -22.639 Td [(a)]TJ +BT +/F62 9.9626 Tf 280.768 537.639 Td [(T)92(able)-250(5:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -34.594 Td [(imin,imax,jmin,jmax)]TJ 0 g 0 G -/F54 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +/F59 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.638 Td [(rscale,cscale)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -60.025 -24.632 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -19.926 Td [(x)]TJ 0 g 0 G - 0 -22.639 Td [(b)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 415.699 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 415.5 Td [(T)]TJ +ET +q +1 0 0 1 445.669 415.699 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 415.5 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 415.699 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 415.5 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(a)-250(submatrix)-250(of)]TJ/F59 9.9626 Tf 111.321 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ -102.713 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ +/F62 9.9626 Tf -297.884 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -22.639 Td [(info)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.801 -31 Td [(3.2.12)-1000(clean)]TJ +/F59 9.9626 Tf -24.906 -19.925 Td [(desc)]TJ ET q -1 0 0 1 159.153 364.307 cm +1 0 0 1 171.218 383.818 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 162.142 364.108 Td [(zeros)-250(\227)-250(Eliminate)-250(zero)-250(coef)18(\002cients)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -62.247 -20.278 Td [(call)]TJ -0 g 0 G - [-525(a%clean_zeros\050info\051)]TJ/F54 9.9626 Tf 14.944 -12.634 Td [(Eliminates)-214(zer)18(o)-214(coef)18(\002cients)-214(in)-214(the)-214(input)-214(matrix.)-298(Note)-214(that)-214(depending)-214(on)-214(the)]TJ -14.944 -11.955 Td [(internal)-246(storage)-245(format,)-247(ther)18(e)-245(may)-246(still)-245(be)-246(some)-245(amount)-246(of)-246(ze)1(r)18(o)-246(padding)-246(in)-245(the)]TJ 0 -11.955 Td [(output.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -24.632 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf 174.207 383.619 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.638 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.639 Td [(a)]TJ +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 335.998 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 327.311 335.798 Td [(desc)]TJ +ET +q +1 0 0 1 348.86 335.998 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 351.998 335.798 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -35.908 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -222.214 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -22.638 Td [(a)]TJ + 0 -19.925 Td [(res)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(matrix)]TJ/F59 9.9626 Tf 50.659 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(without)-250(zer)18(o)-250(coef)18(\002cients.)]TJ -43.436 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ +/F62 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(the)-250(columns)-250(of)]TJ/F60 9.9626 Tf 166.26 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -164.82 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(Speci\002ed)-330(as:)-470(a)-330(number)-330(or)-330(a)-330(rank-one)-330(array)-329(of)-330(long)-330(pr)18(ecision)-330(r)18(eal)-330(num-)]TJ 0 -11.955 Td [(bers.)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -22.638 Td [(info)]TJ +/F59 9.9626 Tf -24.906 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 143.074 -29.888 Td [(21)]TJ + 141.968 -89.943 Td [(38)]TJ 0 g 0 G ET endstream endobj -973 0 obj +1194 0 obj << -/Length 4488 +/Length 7404 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.13)-1000(get)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.6)-1000(psb)]TJ ET q -1 0 0 1 200.01 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 202.999 706.129 Td [(diag)-250(\227)-250(Get)-250(main)-250(diagonal)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -52.294 -19.329 Td [(call)]TJ -0 g 0 G - [-525(a%get_diag\050d,info\051)]TJ/F54 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -20.49 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.121 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 206.349 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.46 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F60 9.9626 Tf 125.989 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F91 10.3811 Tf 25.352 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.007 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F62 9.9626 Tf -195.526 -21.269 Td [(else)-250(if)]TJ/F60 9.9626 Tf 28.159 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F60 9.9626 Tf 71.974 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F91 10.3811 Tf 25.353 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F93 10.3811 Tf 8.17 0 Td [(\050)]TJ/F60 9.9626 Tf 4.443 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F93 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(k)]TJ/F60 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F93 10.3811 Tf 11.088 0 Td [(\050)]TJ/F60 9.9626 Tf 4.444 0 Td [(x)]TJ/F93 10.3811 Tf 5.329 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F67 9.9626 Tf -216.928 -19.411 Td [(psb_geasum\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.679 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + [-525([,global]\051)-190(psb_norm1\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.679 Td [(d)]TJ + [-525([,global]\051)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(main)-250(diagonal.)]TJ 13.839 -11.955 Td [(A)-250(one-dimensional)-250(array)-250(of)-250(the)-250(appr)18(opriate)-250(type.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.679 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -28.213 Td [(3.2.14)-1000(clip)]TJ ET q -1 0 0 1 203.317 472.944 cm +1 0 0 1 128.44 576.025 cm +[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +Q +BT +/F60 9.9626 Tf 134.691 567.457 Td [(a)-25(s)-25(u)-25(m)-7810(x)]TJ/F59 9.9626 Tf 220.765 0 Td [(Function)]TJ +ET +q +1 0 0 1 128.44 563.671 cm +[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +Q +BT +/F62 9.9626 Tf 134.417 555.103 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 371.775 555.303 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 206.306 472.745 Td [(diag)-250(\227)-250(Cut)-250(out)-250(main)-250(diagonal)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -55.601 -19.329 Td [(call)]TJ -0 g 0 G - [-525(a%clip_diag\050b,info\051)]TJ/F54 9.9626 Tf 14.944 -12.144 Td [(Returns)-250(a)-250(copy)-250(of)]TJ/F59 9.9626 Tf 79.73 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ -0 g 0 G -/F51 9.9626 Tf -102.395 -20.49 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 374.763 555.103 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 371.775 543.347 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 374.763 543.148 Td [(geasum)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 371.775 531.392 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 374.763 531.193 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 371.775 519.437 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 374.763 519.238 Td [(geasum)]TJ +ET +q +1 0 0 1 128.44 515.452 cm +[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +Q 0 g 0 G +BT +/F62 9.9626 Tf 229.958 487.074 Td [(T)92(able)-250(6:)-310(Data)-250(types)]TJ 0 g 0 G - 0 -20.679 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.077 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -34.627 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -130.063 -33.561 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - 0 -20.679 Td [(b)]TJ +/F59 9.9626 Tf -29.828 -19.665 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)]TJ/F59 9.9626 Tf 45.37 0 Td [(a)]TJ/F54 9.9626 Tf 7.721 0 Td [(without)-250(the)-250(main)-250(diagonal.)]TJ -39.252 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.622 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -184.984 -20.679 Td [(info)]TJ + 0 -19.666 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -28.213 Td [(3.2.15)-1000(tril)-250(\227)-250(Return)-250(the)-250(lower)-250(triangle)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.921 -19.329 Td [(call)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 385.864 366.56 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 389.002 366.361 Td [(T)]TJ +ET +q +1 0 0 1 394.86 366.56 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 397.998 366.361 Td [(vect)]TJ +ET +q +1 0 0 1 419.547 366.56 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.685 366.361 Td [(type)]TJ 0 g 0 G - [-525(a%tril\050l,info[,&)]TJ 15.691 -11.955 Td [(&)-525(diag,imin,imax,jmin,jmax,rscale,cscale,u]\051)]TJ/F54 9.9626 Tf -21.668 -22.671 Td [(Returns)-309(the)-308(lower)-309(triangular)-308(part)-309(of)-309(submatrix)]TJ/F59 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F54 9.9626 Tf 115.067 0 Td [(,)]TJ -341.22 -11.956 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf 212.61 0 Td [(1)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(6)]TJ 0 g 0 G - [(:imax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + [(.)]TJ 0 g 0 G - [(imin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ +/F59 9.9626 Tf -24.907 -19.666 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 334.939 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 123.397 334.74 Td [(a)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 273.363 287.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 276.501 286.919 Td [(desc)]TJ +ET +q +1 0 0 1 298.05 287.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 301.189 286.919 Td [(type)]TJ 0 g 0 G - [(,)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(:jmax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ +/F59 9.9626 Tf -222.215 -19.665 Td [(global)]TJ 0 g 0 G - [(jmin)]TJ +/F62 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F67 9.9626 Tf 165.318 0 Td [(global)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + [(=)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [(.true.)]TJ 0 g 0 G -/F54 9.9626 Tf -212.61 -11.955 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(upper)-250(triangle.)]TJ +/F59 9.9626 Tf -190.225 -31.621 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.49 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -19.666 Td [(Function)-250(value)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.679 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(vector)]TJ/F60 9.9626 Tf 102.161 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -155.236 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.133 0 Td [(global)]TJ/F62 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F67 9.9626 Tf 121.612 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.false.)]TJ/F62 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 166.874 -29.888 Td [(22)]TJ + 141.968 -29.888 Td [(39)]TJ 0 g 0 G ET endstream endobj -977 0 obj +1199 0 obj << -/Length 6185 +/Length 3046 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(a)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F59 9.9626 Tf 149.76 0 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-333(i)1(s)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +/F67 9.9626 Tf 20.922 -19.926 Td [(vres\050)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG [(1)]TJ 0 g 0 G -/F54 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F59 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=-)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ + [-525(psb_geasum\050x1,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ + 31.382 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(2)]TJ 0 g 0 G -/F51 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G - 0 -18.75 Td [(l)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 8.299 0 Td [(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 137.333 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ -125.956 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ + [-525(psb_geasum\050x2,desc_a,info,global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -18.75 Td [(u)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 184.485 0 Td [(a)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ -175.878 -11.956 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ + 0 -11.955 Td [(vres\050)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(3)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -18.749 Td [(info)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.801 -26.59 Td [(3.2.16)-1000(triu)-250(\227)-250(Return)-250(the)-250(upper)-250(triangle)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.922 -18.964 Td [(call)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G - [-525(a%triu\050u,info[,&)]TJ 15.691 -11.955 Td [(&)-525(diag,imin,imax,jmin,jmax,rscale,cscale,l]\051)]TJ/F54 9.9626 Tf -21.669 -18.979 Td [(Returns)-289(the)-290(upper)-289(triangular)-290(part)-289(of)-290(submatrix)]TJ/F59 9.9626 Tf 211.209 0 Td [(A\050imin:imax,jmin:jmax\051)]TJ/F54 9.9626 Tf 115.068 0 Td [(,)]TJ -341.221 -11.955 Td [(optionally)-190(r)18(escaling)-190(r)18(ow/col)-190(indices)-190(to)-190(the)-190(range)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf 212.611 0 Td [(1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(:imax)]TJ + [-525(psb_geasum\050x3,desc_a,info,global)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + [(=)]TJ 0 g 0 G - [(imin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + [(.false.\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G + [-525(psb_sum\050ctxt,vres\050)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG [(1)]TJ 0 g 0 G - [(,)]TJ + [(:)]TJ 0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [(3)]TJ 0 g 0 G - [(:jmax)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(-)]TJ + [(\051\051)]TJ/F62 9.9626 Tf -52.304 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ 0 g 0 G - [(jmin)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(+)]TJ + 141.968 -402.49 Td [(40)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +ET + +endstream +endobj +1207 0 obj +<< +/Length 7502 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 99.895 706.129 Td [(4.7)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 151.016 706.129 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.121 -20.192 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(1-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F60 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(:)]TJ/F60 9.9626 Tf 120.031 -13.856 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.293 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)]TJ/F62 9.9626 Tf 13.273 0 Td [(max)]TJ/F60 7.5716 Tf 7.76 -7.335 Td [(k)]TJ/F91 10.3811 Tf 12.944 7.335 Td [(j)]TJ/F60 9.9626 Tf 3.298 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.273 0 Td [(k)]TJ/F62 9.9626 Tf 4.598 0 Td [(,)]TJ/F60 9.9626 Tf 4.206 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(j)]TJ/F62 9.9626 Tf -215.202 -24.535 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 206.349 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.46 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F60 9.9626 Tf 125.227 -23.185 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.293 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.007 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F62 9.9626 Tf -196.039 -23.185 Td [(else)-250(if)]TJ/F60 9.9626 Tf 28.159 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F60 9.9626 Tf 71.212 -23.185 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.294 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F93 10.3811 Tf 8.169 0 Td [(\050)]TJ/F60 9.9626 Tf 4.444 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F93 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(k)]TJ/F60 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F93 10.3811 Tf 11.088 0 Td [(\050)]TJ/F60 9.9626 Tf 4.443 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -212.21 -21.96 Td [(call)]TJ +0 g 0 G + [-525(psb_geasums\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 141.219 0 Td [(,)]TJ -353.83 -11.956 Td [(and)-250(r)18(eturing)-250(the)-250(complementary)-250(lower)-250(triangle.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +q +1 0 0 1 126.328 514.627 cm +[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +Q +BT +/F60 9.9626 Tf 132.33 506.059 Td [(r)-17(e)-25(s)-8868(x)]TJ/F59 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 126.328 502.274 cm +[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +Q +BT +/F62 9.9626 Tf 132.305 493.706 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 369.663 493.905 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 372.651 493.706 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 369.663 481.95 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 372.651 481.751 Td [(geasums)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 369.663 469.995 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 372.651 469.795 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 369.663 458.04 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 372.651 457.84 Td [(geasums)]TJ +ET +q +1 0 0 1 126.328 454.054 cm +[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +Q 0 g 0 G -/F51 9.9626 Tf 0 -17.574 Td [(T)90(ype:)]TJ +BT +/F62 9.9626 Tf 229.958 425.676 Td [(T)92(able)-250(7:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -18.75 Td [(On)-250(Entry)]TJ 0 g 0 G +/F59 9.9626 Tf -130.063 -37.636 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -18.75 Td [(a)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ -160.078 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F59 9.9626 Tf -29.828 -22.46 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -77.918 -30.706 Td [(diag)]TJ 0 g 0 G -/F54 9.9626 Tf 24.907 0 Td [(Include)-300(diagonals)-301(up)-300(to)-301(this)-300(one;)]TJ/F59 9.9626 Tf 149.76 0 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + 0 -22.459 Td [(x)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 385.864 295.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 389.002 295.3 Td [(T)]TJ +ET +q +1 0 0 1 394.86 295.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 397.998 295.3 Td [(vect)]TJ +ET +q +1 0 0 1 419.547 295.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.685 295.3 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 34.376 0 Td [(means)-300(the)-301(\002rst)-300(super)18(diagonal,)]TJ/F59 9.9626 Tf -184.136 -11.955 Td [(diag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=-)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(7)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 39.103 0 Td [(means)-250(the)-250(\002rst)-250(subdiagonal.)-310(Default)-250(0.)]TJ +/F59 9.9626 Tf -24.907 -22.459 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 261.085 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 123.397 260.886 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -64.01 -18.75 Td [(imin,imax,jmin,jmax)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 273.363 213.264 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 276.501 213.065 Td [(desc)]TJ +ET +q +1 0 0 1 298.05 213.264 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 301.189 213.065 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 99.885 0 Td [(Minimum)-250(and)-250(maximum)-250(r)18(ow)-250(and)-250(column)-250(indices.)]TJ -74.978 -11.955 Td [(T)90(ype:)-310(optional.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.75 Td [(rscale,cscale)]TJ +/F59 9.9626 Tf -222.215 -22.459 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 60.025 0 Td [(Whether)-250(to)-250(r)18(escale)-250(r)18(ow/column)-250(indices.)-310(T)90(ype:)-310(optional.)]TJ 0 g 0 G -/F51 9.9626 Tf -60.025 -18.979 Td [(On)-250(Return)]TJ + 0 -22.46 Td [(res)]TJ 0 g 0 G +/F62 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F60 9.9626 Tf 176.182 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Short)-255(as:)-320(a)-255(long)-254(pr)18(ecision)-255(r)18(eal)-255(number)74(.)-325(Speci\002ed)-255(as:)-320(a)-254(long)-255(pr)18(ecision)-255(r)18(eal)]TJ 0 -11.955 Td [(number)74(.)]TJ 0 g 0 G - 0 -18.75 Td [(u)]TJ + 141.968 -29.888 Td [(41)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(A)-250(copy)-250(of)-250(the)-250(upper)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 138.668 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ -130.06 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ +ET + +endstream +endobj +1212 0 obj +<< +/Length 583 +>> +stream 0 g 0 G -/F51 9.9626 Tf -184.985 -18.75 Td [(l)]TJ 0 g 0 G -/F54 9.9626 Tf 8.299 0 Td [(\050optional\051)-250(A)-250(copy)-250(of)-250(the)-250(lower)-250(triangle)-250(of)]TJ/F59 9.9626 Tf 183.151 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ -171.773 -11.955 Td [(A)-250(variable)-250(of)-250(type)]TJ/F59 9.9626 Tf 81.623 0 Td [(psb_Tspmat_type)]TJ/F54 9.9626 Tf 78.455 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -184.985 -18.75 Td [(info)]TJ +BT +/F59 9.9626 Tf 150.705 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 143.074 -29.888 Td [(23)]TJ + 141.968 -567.87 Td [(42)]TJ 0 g 0 G ET endstream endobj -983 0 obj +1219 0 obj << -/Length 7619 +/Length 6687 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(3.2.17)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.8)-1000(psb)]TJ ET q -1 0 0 1 202.769 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 205.758 706.129 Td [(set)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.121 -20.076 Td [(This)-250(function)-250(computes)-250(the)-250(2-norm)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 206.349 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F60 9.9626 Tf 9.46 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F60 9.9626 Tf 122.551 -25.46 Td [(n)-15(r)-35(m)]TJ/F62 9.9626 Tf 17.788 0 Td [(2)]TJ/F91 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ ET q -1 0 0 1 219.078 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 287.432 658.569 cm +[]0 d 0 J 0.408 w 0 0 m 16.592 0 l S Q BT -/F51 9.9626 Tf 222.067 706.129 Td [(mat)]TJ +/F60 9.9626 Tf 287.726 648.638 Td [(x)]TJ/F60 7.5716 Tf 5.399 2.88 Td [(T)]TJ/F60 9.9626 Tf 5.694 -2.88 Td [(x)]TJ/F62 9.9626 Tf -198.924 -23.065 Td [(else)-250(if)]TJ/F60 9.9626 Tf 28.159 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F60 9.9626 Tf 103.113 -25.46 Td [(n)-15(r)-35(m)]TJ/F62 9.9626 Tf 17.789 0 Td [(2)]TJ/F91 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ ET q -1 0 0 1 239.82 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 286.694 610.044 cm +[]0 d 0 J 0.408 w 0 0 m 18.069 0 l S Q BT -/F51 9.9626 Tf 242.809 706.129 Td [(default)-250(\227)-250(Set)-250(default)-250(storage)-250(format)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -92.104 -18.964 Td [(call)]TJ -0 g 0 G - [-1050(psb_set_mat_default\050a\051)]TJ +/F60 9.9626 Tf 286.988 600.113 Td [(x)]TJ/F60 7.5716 Tf 5.588 2.88 Td [(H)]TJ/F60 9.9626 Tf 6.982 -2.88 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ +ET +q +1 0 0 1 128.689 575.464 cm +[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S +Q +BT +/F60 9.9626 Tf 134.791 566.896 Td [(n)-15(r)-35(m)]TJ/F62 9.9626 Tf 17.788 0 Td [(2)]TJ/F60 9.9626 Tf 82.505 0 Td [(x)]TJ/F59 9.9626 Tf 120.621 0 Td [(Function)]TJ +ET +q +1 0 0 1 128.689 563.111 cm +[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S +Q +BT +/F62 9.9626 Tf 134.667 554.543 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 372.024 554.742 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.012 554.543 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 372.024 542.787 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.012 542.588 Td [(genrm2)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 372.024 530.832 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.012 530.632 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 372.024 518.876 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.012 518.677 Td [(genrm2)]TJ +ET +q +1 0 0 1 128.689 514.891 cm +[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S +Q 0 g 0 G +BT +/F62 9.9626 Tf 229.958 486.513 Td [(T)92(able)-250(8:)-310(Data)-250(types)]TJ 0 g 0 G - 0 -19.231 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(a)-203(variable)-203(of)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 55.42 0 Td [(class)]TJ +/F67 9.9626 Tf -115.119 -27.631 Td [(psb_genrm2\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\050psb_T_base_sparse_mat\051)]TJ/F54 9.9626 Tf 148.475 0 Td [(r)18(equesting)-203(a)-204(new)-203(default)-203(stor)18(-)]TJ -188.951 -11.956 Td [(age)-250(format.)]TJ 0 -11.955 Td [(T)90(ype:)-310(r)18(equir)18(ed.)]TJ/F51 9.9626 Tf -24.907 -26.815 Td [(3.2.18)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -18.964 Td [(call)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-1050(a%clone\050b,info\051)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -20.183 Td [(T)90(ype:)]TJ + [-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_norm2\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.231 Td [(On)-250(Entry)]TJ + [-525(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525([,global]\051)]TJ 0 g 0 G - 0 -19.231 Td [(a)]TJ +/F59 9.9626 Tf 0 -36.169 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -77.917 -32.138 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -29.828 -22.221 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.232 Td [(b)]TJ + 0 -22.221 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ +ET +q +1 0 0 1 385.864 318.695 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 389.002 318.495 Td [(T)]TJ +ET +q +1 0 0 1 394.86 318.695 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 397.998 318.495 Td [(vect)]TJ +ET +q +1 0 0 1 419.547 318.695 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.685 318.495 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -11.068 -19.231 Td [(info)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(8)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ/F51 9.9626 Tf -23.8 -26.815 Td [(3.2.19)-1000(Named)-250(Constants)]TJ + [(.)]TJ 0 g 0 G - 0 -18.964 Td [(psb)]TJ +/F59 9.9626 Tf -24.907 -22.221 Td [(desc)]TJ ET q -1 0 0 1 167.9 372.049 cm +1 0 0 1 120.408 284.518 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 371.85 Td [(dupl)]TJ +/F59 9.9626 Tf 123.397 284.319 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 193.066 372.049 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 273.363 236.698 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 196.055 371.85 Td [(ovwrt)]TJ +/F67 9.9626 Tf 276.501 236.499 Td [(desc)]TJ ET q -1 0 0 1 223.222 372.049 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 298.05 236.698 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q +BT +/F67 9.9626 Tf 301.189 236.499 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -222.215 -22.221 Td [(global)]TJ +0 g 0 G +/F62 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.956 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F67 9.9626 Tf 165.318 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.true.)]TJ +0 g 0 G +/F59 9.9626 Tf -190.225 -34.176 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 166.875 -29.888 Td [(43)]TJ +0 g 0 G +ET + +endstream +endobj +1225 0 obj +<< +/Length 4322 +>> +stream +0 g 0 G +0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 231.193 371.85 Td [(Duplicate)-259(coef)18(\002cients)-259(shou)1(ld)-259(be)-259(overwritten)-259(\050i.e.)-336(ignor)18(e)-259(du-)]TJ -55.582 -11.955 Td [(plications\051)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(Function)-250(V)111(alue)]TJ +0 g 0 G +/F62 9.9626 Tf 73.882 0 Td [(is)-250(the)-250(2-norm)-250(of)-250(vector)]TJ/F60 9.9626 Tf 102.161 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -156.342 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.133 0 Td [(global)]TJ/F62 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F67 9.9626 Tf 121.612 0 Td [(global)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(.false.)]TJ/F62 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.231 Td [(psb)]TJ + [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ 24.981 -17.933 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(1)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ ET q -1 0 0 1 167.9 340.863 cm +1 0 0 1 278.034 495.12 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 340.664 Td [(dupl)]TJ +/F62 9.9626 Tf 281.627 494.921 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(1)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ ET q -1 0 0 1 193.066 340.863 cm +1 0 0 1 367.96 495.12 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 196.055 340.664 Td [(add)]TJ +/F62 9.9626 Tf 371.922 494.921 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(2)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ ET q -1 0 0 1 213.808 340.863 cm +1 0 0 1 278.034 483.165 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 221.778 340.664 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(be)-250(added;)]TJ -0 g 0 G -/F51 9.9626 Tf -71.073 -19.232 Td [(psb)]TJ +/F62 9.9626 Tf 281.627 482.966 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(2)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ ET q -1 0 0 1 167.9 321.632 cm +1 0 0 1 367.96 483.165 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 321.432 Td [(dupl)]TJ +/F62 9.9626 Tf 371.922 482.966 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(3)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ ET q -1 0 0 1 193.066 321.632 cm +1 0 0 1 278.034 471.21 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 196.055 321.432 Td [(err)]TJ +/F62 9.9626 Tf 281.627 471.011 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(3)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ ET q -1 0 0 1 209.384 321.632 cm +1 0 0 1 367.96 471.21 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 217.355 321.432 Td [(Duplicate)-250(coef)18(\002cients)-250(should)-250(trigger)-250(an)-250(err)18(or)-250(conditino)]TJ -0 g 0 G -/F51 9.9626 Tf -66.65 -19.231 Td [(psb)]TJ +/F62 9.9626 Tf 371.922 471.011 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -170.658 -11.956 Td [(c)-175(a)-175(l)-174(l)-831(p)-56(s)-56(b)]TJ ET q -1 0 0 1 167.9 302.4 cm +1 0 0 1 247.952 459.255 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 302.201 Td [(upd)]TJ +/F62 9.9626 Tf 251.497 459.055 Td [(n)-56(r)-56(m)-55(2)-190(\050)-264(c)-132(t)-131(x)-131(t)-438(,)-283(v)-107(r)-107(e)-108(s)-300(\050)-193(1)-193(:)-193(3)-193(\051)-193(\051)]TJ +0 g 0 G +0 g 0 G + -75.886 -21.917 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.956 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ +0 g 0 G + 141.968 -334.744 Td [(44)]TJ +0 g 0 G ET -q -1 0 0 1 189.748 302.4 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1238 0 obj +<< +/Length 6244 +>> +stream +0 g 0 G +0 g 0 G BT -/F51 9.9626 Tf 192.737 302.201 Td [(d\003t)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.9)-1000(psb)]TJ ET q -1 0 0 1 208.827 302.4 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 216.797 302.201 Td [(Default)-250(update)-250(strategy)-250(for)-250(matrix)-250(coef)18(\002cients;)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F62 9.9626 Tf -51.121 -18.964 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(2-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F60 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(:)]TJ/F60 9.9626 Tf 126.858 -11.955 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 12.294 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F91 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.006 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F62 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F60 9.9626 Tf 6.821 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.744 Td [(2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -186.919 -16.189 Td [(call)]TJ +0 g 0 G + [-525(psb_genrm2s\050res,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0 g 0 G +0 g 0 G 0 g 0 G -/F51 9.9626 Tf -66.092 -19.231 Td [(psb)]TJ ET q -1 0 0 1 167.9 283.169 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 126.577 630.954 cm +[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S Q BT -/F51 9.9626 Tf 170.889 282.97 Td [(upd)]TJ +/F60 9.9626 Tf 132.579 622.386 Td [(r)-17(e)-25(s)-8868(x)]TJ/F59 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ ET q -1 0 0 1 189.748 283.169 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 126.577 618.6 cm +[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S Q BT -/F51 9.9626 Tf 192.737 282.97 Td [(srch)]TJ +/F62 9.9626 Tf 132.554 610.032 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 212.144 283.169 cm +1 0 0 1 369.912 610.231 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 220.114 282.97 Td [(Update)-250(strategy)-250(based)-250(on)-250(sear)18(ch)-250(into)-250(the)-250(data)-250(str)8(uctur)18(e;)]TJ -0 g 0 G -/F51 9.9626 Tf -69.409 -19.232 Td [(psb)]TJ +/F62 9.9626 Tf 372.9 610.032 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 167.9 263.938 cm +1 0 0 1 369.912 598.276 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 170.889 263.738 Td [(upd)]TJ +/F62 9.9626 Tf 372.9 598.077 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 189.748 263.938 cm +1 0 0 1 369.912 586.321 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 192.737 263.738 Td [(perm)]TJ +/F62 9.9626 Tf 372.9 586.122 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 217.135 263.938 cm +1 0 0 1 369.912 574.366 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 225.106 263.738 Td [(Update)-392(strategy)-393(based)-392(on)-393(additional)-392(permutation)-393(data)-392(\050see)]TJ -49.495 -11.955 Td [(tools)-250(r)18(outine)-250(description\051.)]TJ/F51 11.9552 Tf -24.906 -28.807 Td [(3.3)-1000(Dense)-250(V)111(ector)-250(Data)-250(Structure)]TJ/F54 9.9626 Tf 0 -18.964 Td [(The)]TJ/F59 9.9626 Tf 20.094 0 Td [(psb)]TJ +/F62 9.9626 Tf 372.9 574.167 Td [(genrm2s)]TJ ET q -1 0 0 1 187.117 204.211 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 126.577 570.381 cm +[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S Q +0 g 0 G BT -/F59 9.9626 Tf 190.255 204.012 Td [(T)]TJ +/F62 9.9626 Tf 229.958 542.002 Td [(T)92(able)-250(9:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +/F59 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 196.113 204.211 cm +1 0 0 1 385.864 420.062 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 199.252 204.012 Td [(vect)]TJ +/F67 9.9626 Tf 389.002 419.863 Td [(T)]TJ ET q -1 0 0 1 220.801 204.211 cm +1 0 0 1 394.86 420.062 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 223.939 204.012 Td [(type)]TJ/F54 9.9626 Tf 24.338 0 Td [(data)-343(str)8(uctur)18(e)-343(encapsulates)-343(the)-343(dense)-343(vectors)-342(in)-343(a)-343(way)]TJ -97.572 -11.955 Td [(similar)-368(to)-368(sparse)-368(matrices,)-397(i.e.)-664(including)-368(a)-368(base)-368(type)]TJ/F59 9.9626 Tf 242.472 0 Td [(psb)]TJ +/F67 9.9626 Tf 397.998 419.863 Td [(vect)]TJ ET q -1 0 0 1 409.495 192.256 cm +1 0 0 1 419.547 420.062 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 412.633 192.057 Td [(T)]TJ +/F67 9.9626 Tf 422.685 419.863 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.883 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(9)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(desc)]TJ ET q -1 0 0 1 418.491 192.256 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.408 388.181 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 421.63 192.057 Td [(base)]TJ +/F59 9.9626 Tf 123.397 387.982 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 443.178 192.256 cm +1 0 0 1 273.363 340.361 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 446.317 192.057 Td [(vect)]TJ +/F67 9.9626 Tf 276.501 340.161 Td [(desc)]TJ ET q -1 0 0 1 467.866 192.256 cm +1 0 0 1 298.05 340.361 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 471.004 192.057 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -341.22 -11.956 Td [(The)-263(user)-263(will)-263(not,)-266(in)-263(general,)-267(access)-263(the)-263(vector)-263(components)-263(dir)18(ectly)111(,)-266(but)-263(rather)]TJ 0 -11.955 Td [(via)-222(the)-222(r)18(out)1(ines)-222(of)-222(sec.)]TJ -0 0 1 rg 0 0 1 RG - [-222(6)]TJ +/F67 9.9626 Tf 301.189 340.161 Td [(type)]TJ 0 g 0 G - [(.)-300(Among)-222(other)-222(simple)-222(things,)-227(we)-222(de\002ne)-222(her)18(e)-221(an)-222(extrac-)]TJ 0 -11.955 Td [(tion)-273(method)-274(that)-273(can)-274(be)-273(used)-274(to)-273(get)-274(a)-273(full)-274(copy)-273(of)-274(the)-273(part)-274(of)-273(the)-274(vector)-273(stor)18(ed)]TJ 0 -11.955 Td [(on)-250(the)-250(local)-250(pr)18(ocess.)]TJ 14.944 -11.955 Td [(The)-311(type)-311(declaration)-311(is)-310(shown)-311(in)-311(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-311(3)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [-311(wher)18(e)]TJ/F59 9.9626 Tf 217.442 0 Td [(T)]TJ/F54 9.9626 Tf 8.327 0 Td [(is)-311(a)-311(placeholder)-311(for)-310(the)]TJ -240.713 -11.955 Td [(data)-250(type)-250(and)-250(pr)18(ecision)-250(variants)]TJ +/F59 9.9626 Tf -222.215 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - 166.874 -29.888 Td [(24)]TJ +0 g 0 G + 0 -19.925 Td [(res)]TJ +0 g 0 G +/F62 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F60 9.9626 Tf 176.182 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -106.261 Td [(45)]TJ 0 g 0 G ET endstream endobj -989 0 obj +1245 0 obj << -/Length 4359 +/Length 5385 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(I)]TJ -0 g 0 G -/F54 9.9626 Tf 8.857 0 Td [(Integer;)]TJ -0 g 0 G -/F51 9.9626 Tf -8.857 -20.359 Td [(S)]TJ -0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(Single)-250(pr)18(ecision)-250(r)18(eal;)]TJ -0 g 0 G -/F51 9.9626 Tf -11.069 -20.358 Td [(D)]TJ -0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(Double)-250(pr)18(ecision)-250(r)18(eal;)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.10)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(matrix)]TJ/F60 9.9626 Tf 208.231 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(:)]TJ/F60 9.9626 Tf -74.65 -33.873 Td [(n)-15(r)-35(m)]TJ/F62 9.9626 Tf 17.788 0 Td [(1)]TJ/F91 10.3811 Tf 7.873 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.335 0 Td [(A)]TJ/F91 10.3811 Tf 7.442 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F62 9.9626 Tf -198.652 -20.06 Td [(wher)18(e:)]TJ 0 g 0 G -/F51 9.9626 Tf -13.281 -20.359 Td [(C)]TJ +/F60 9.9626 Tf 0.622 -19.925 Td [(A)]TJ 0 g 0 G -/F54 9.9626 Tf 12.175 0 Td [(Single)-250(pr)18(ecision)-250(complex;)]TJ +/F62 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F60 9.9626 Tf 125.981 0 Td [(A)]TJ 0 g 0 G -/F51 9.9626 Tf -12.175 -20.358 Td [(Z)]TJ 0 g 0 G -/F54 9.9626 Tf 11.627 0 Td [(Double)-250(pr)18(ecision)-250(complex.)]TJ -11.627 -20.251 Td [(The)-209(actual)-208(data)-209(is)-208(contained)-209(in)-209(the)-208(polymorphic)-209(component)]TJ/F59 9.9626 Tf 261.152 0 Td [(v%v)]TJ/F54 9.9626 Tf 15.691 0 Td [(;)-222(the)-209(separation)]TJ -276.843 -11.955 Td [(between)-353(the)-353(application)-353(and)-353(the)-353(actual)-353(data)-353(is)-353(esse)1(ntial)-353(for)-353(cases)-353(wher)18(e)-353(it)-353(is)]TJ 0 -11.955 Td [(necessary)-321(to)-321(link)-320(to)-321(data)-321(storage)-321(made)-320(available)-321(elsewher)18(e)-321(outside)-320(the)-321(dir)18(ect)]TJ 0 -11.955 Td [(contr)18(ol)-231(of)-231(the)-231(compiler/application,)-235(e.g.)-304(data)-231(stor)18(ed)-231(in)-231(a)-231(graphics)-231(accelerator)-74('s)]TJ 0 -11.955 Td [(private)-250(memory)111(.)]TJ 0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 99.895 452.975 cm -0 0 343.711 82.69 re f +1 0 0 1 229.61 588.515 cm +[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +Q +BT +/F60 9.9626 Tf 236.21 579.947 Td [(A)]TJ/F59 9.9626 Tf 120.293 0 Td [(Function)]TJ +ET +q +1 0 0 1 229.61 576.161 cm +[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +Q +BT +/F62 9.9626 Tf 235.587 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 372.821 567.793 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.81 567.594 Td [(spnrm1)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 372.821 555.838 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.81 555.638 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 372.821 543.882 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.81 543.683 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 372.821 531.927 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 375.81 531.728 Td [(spnrm1)]TJ +ET +q +1 0 0 1 229.61 527.942 cm +[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 112.299 525.005 Td [(type)]TJ +/F62 9.9626 Tf 278.277 499.564 Td [(T)92(able)-250(10:)-310(Data)-250(types)]TJ 0 g 0 G - [-525(psb_T_base_vect_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.414 -10.959 Td [(TYPE)]TJ 0 g 0 G - [(\050KIND_\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ 0 g 0 G - [-525(v\050:\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.414 -10.959 Td [(end)-525(type)]TJ +/F67 9.9626 Tf -127.572 -33.873 Td [(psb_spnrm1\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_norm1\050A,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G - [-525(psb_T_base_vect_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -21.918 Td [(type)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - [-525(psb_T_vect_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.414 -10.959 Td [(class)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [(\050psb_T_base_vect_type\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ 0 g 0 G - [-525(v)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.414 -10.959 Td [(end)-525(type)]TJ + 0 -19.926 Td [(a)]TJ 0 g 0 G - [-1050(psb_T_vect_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 194.722 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -187.095 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.242 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 344.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 327.311 344.147 Td [(Tspmat)]TJ +ET +q +1 0 0 1 359.321 344.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 362.459 344.147 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 6.677 -41.429 Td [(Listing)-250(3:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(dense)-250(vector)74(.)]TJ/F51 9.9626 Tf -19.081 -39.929 Td [(3.3.1)-1000(V)111(ector)-250(Methods)]TJ 0 -19.174 Td [(3.3.2)-1000(get)]TJ +/F59 9.9626 Tf -232.675 -19.926 Td [(desc)]TJ ET q -1 0 0 1 144.219 358.919 cm +1 0 0 1 171.218 324.421 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 147.208 358.719 Td [(nrows)-250(\227)-250(Get)-250(number)-250(of)-250(rows)-250(in)-250(a)-250(dense)-250(vector)]TJ/F59 9.9626 Tf -47.313 -19.173 Td [(nr)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(v%get_nrows\050\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.359 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.358 Td [(v)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ -0 g 0 G - -56.339 -34.198 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf 174.207 324.221 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 324.173 276.6 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 327.311 276.401 Td [(desc)]TJ +ET +q +1 0 0 1 348.86 276.6 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 351.998 276.401 Td [(type)]TJ 0 g 0 G - 0 -20.358 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(number)-250(of)-250(r)18(ows)-250(of)-250(dense)-250(vector)]TJ/F59 9.9626 Tf 161.273 0 Td [(v)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ/F51 9.9626 Tf -239.281 -27.757 Td [(3.3.3)-1000(sizeof)-250(\227)-250(Get)-250(memory)-250(occupation)-250(in)-250(bytes)-250(of)-250(a)-250(dense)-250(vector)]TJ/F59 9.9626 Tf 0 -19.174 Td [(memory_size)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ +/F59 9.9626 Tf -222.214 -19.926 Td [(On)-250(Return)]TJ 0 g 0 G - [-525(v%sizeof\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.351 Td [(T)90(ype:)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F60 9.9626 Tf 150.4 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -205.587 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.358 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -24.906 -19.926 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(25)]TJ + 141.968 -54.456 Td [(46)]TJ 0 g 0 G ET endstream endobj -996 0 obj +1252 0 obj << -/Length 3735 +/Length 5404 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(v)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.11)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(matrix)]TJ/F60 9.9626 Tf 235.459 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(:)]TJ/F60 9.9626 Tf -102.327 -33.873 Td [(n)-15(r)-35(m)-18(i)]TJ/F91 10.3811 Tf 23.698 0 Td [(\040)-291(k)]TJ/F60 9.9626 Tf 19.336 0 Td [(A)]TJ/F91 10.3811 Tf 7.442 0 Td [(k)]TJ/F104 7.5716 Tf 5.409 -1.494 Td [(\245)]TJ/F62 9.9626 Tf -196.335 -20.424 Td [(wher)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ +/F60 9.9626 Tf 0.623 -19.925 Td [(A)]TJ 0 g 0 G - -56.338 -36.868 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F60 9.9626 Tf 125.981 0 Td [(A)]TJ 0 g 0 G 0 g 0 G - 0 -23.918 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(The)-250(memory)-250(occupation)-250(in)-250(bytes.)]TJ/F51 9.9626 Tf -72.776 -32.82 Td [(3.3.4)-1000(set)-250(\227)-250(Set)-250(contents)-250(of)-250(the)-250(vector)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 5.23 -20.898 Td [(call)]TJ +ET +q +1 0 0 1 179.842 588.515 cm +[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +Q +BT +/F60 9.9626 Tf 186.442 579.947 Td [(A)]TJ/F59 9.9626 Tf 120.292 0 Td [(Function)]TJ +ET +q +1 0 0 1 179.842 576.161 cm +[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +Q +BT +/F62 9.9626 Tf 185.819 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 323.053 567.793 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 326.042 567.594 Td [(spnrmi)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 323.053 555.838 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 326.042 555.638 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 323.053 543.882 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 326.042 543.683 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 323.053 531.927 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 326.042 531.728 Td [(spnrmi)]TJ +ET +q +1 0 0 1 179.842 527.942 cm +[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +Q 0 g 0 G - [-1050(v%set\050alpha[,first,last]\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +BT +/F62 9.9626 Tf 227.467 499.564 Td [(T)92(able)-250(11:)-310(Data)-250(types)]TJ 0 g 0 G - [-1050(v%set\050vect[,first,last]\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ 0 g 0 G - [-1050(v%zero\050\051)]TJ 0 g 0 G -/F51 9.9626 Tf -5.23 -24.913 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F67 9.9626 Tf -127.572 -33.873 Td [(psb_spnrmi\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_normi\050A,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -23.918 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - 0 -23.918 Td [(v)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ 0 g 0 G - -56.338 -35.873 Td [(alpha)]TJ + 0 -19.926 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(A)-250(scalar)-250(value.)]TJ -5.53 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 194.722 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(.)]TJ -187.096 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG - [-250(1)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 273.363 344.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 276.501 344.147 Td [(Tspmat)]TJ +ET +q +1 0 0 1 308.511 344.346 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 311.649 344.147 Td [(type)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -23.918 Td [(\002rst,last)]TJ +/F59 9.9626 Tf -232.676 -19.926 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 324.421 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 123.397 324.221 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 41.215 0 Td [(Boundaries)-250(for)-250(setting)-250(in)-250(the)-250(vector)74(.)]TJ -16.309 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integers.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ +ET +q +1 0 0 1 273.363 276.6 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 276.501 276.401 Td [(desc)]TJ +ET +q +1 0 0 1 298.05 276.6 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 301.189 276.401 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -23.918 Td [(vect)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.242 0 Td [(An)-250(array)]TJ 1.664 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ +/F59 9.9626 Tf -222.215 -19.926 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)]TJ -24.906 -25.91 Td [(Note)-336(that)-336(a)-335(call)-336(to)]TJ/F59 9.9626 Tf 84.614 0 Td [(v%zero\050\051)]TJ/F54 9.9626 Tf 45.189 0 Td [(is)-336(pr)18(ovided)-336(as)-335(a)-336(shorthand,)-358(but)-335(is)-336(equivalent)-336(to)]TJ -129.803 -11.956 Td [(a)-270(call)-270(to)]TJ/F59 9.9626 Tf 36.947 0 Td [(v%set\050zero\051)]TJ/F54 9.9626 Tf 60.225 0 Td [(with)-270(the)]TJ/F59 9.9626 Tf 39.456 0 Td [(zero)]TJ/F54 9.9626 Tf 23.613 0 Td [(constant)-270(having)-270(the)-271(appr)18(opriat)1(e)-271(type)-270(and)]TJ -160.241 -11.955 Td [(kind.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -25.91 Td [(On)-250(Return)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(in\002nity-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F60 9.9626 Tf 177.627 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -232.814 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ 0 g 0 G - 0 -23.918 Td [(v)]TJ +/F59 9.9626 Tf -24.907 -19.926 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(dense)-250(vector)74(,)-250(with)-250(updated)-250(entries)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 110.536 -41.843 Td [(26)]TJ + 141.968 -54.456 Td [(47)]TJ 0 g 0 G ET endstream endobj -1003 0 obj +1263 0 obj << -/Length 4464 +/Length 7972 >> stream 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(3.3.5)-1000(get)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.12)-1000(psb)]TJ ET q -1 0 0 1 144.219 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 147.208 706.129 Td [(vect)-250(\227)-250(Get)-250(a)-250(copy)-250(of)-250(the)-250(vector)-250(contents)]TJ -0 g 0 G +/F59 11.9552 Tf 207.803 706.129 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Product)]TJ/F62 9.9626 Tf -57.098 -19.303 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct:)]TJ/F60 9.9626 Tf 140.147 -24.611 Td [(y)]TJ/F91 10.3811 Tf 7.998 0 Td [(\040)]TJ/F68 9.9626 Tf 13.397 0 Td [(a)]TJ/F60 9.9626 Tf 6.008 0 Td [(A)-42(x)]TJ/F93 10.3811 Tf 14.878 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G -/F59 9.9626 Tf -47.313 -19.66 Td [(extv)-525(=)-525(v%get_vect\050[n]\051)]TJ +/F62 9.9626 Tf 133.513 0 Td [(\0501\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.994 Td [(T)90(ype:)]TJ +/F60 9.9626 Tf -194.745 -20.13 Td [(y)]TJ/F91 10.3811 Tf 7.998 0 Td [(\040)]TJ/F68 9.9626 Tf 13.398 0 Td [(a)]TJ/F60 9.9626 Tf 6.007 0 Td [(A)]TJ/F60 7.5716 Tf 7.511 4.115 Td [(T)]TJ/F60 9.9626 Tf 5.694 -4.115 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 130.715 0 Td [(\0502\051)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.362 Td [(On)-250(Entry)]TJ +/F60 9.9626 Tf -195.482 -20.129 Td [(y)]TJ/F91 10.3811 Tf 7.998 0 Td [(\040)]TJ/F68 9.9626 Tf 13.397 0 Td [(a)]TJ/F60 9.9626 Tf 6.008 0 Td [(A)]TJ/F60 7.5716 Tf 7.7 4.114 Td [(H)]TJ/F60 9.9626 Tf 6.981 -4.114 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ 0 g 0 G +/F62 9.9626 Tf 129.977 0 Td [(\0503\051)]TJ 0 g 0 G - 0 -21.361 Td [(v)]TJ + -317.15 -18.633 Td [(wher)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(dense)-250(vector)]TJ 14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ +/F60 9.9626 Tf -14.65 -20.451 Td [(x)]TJ 0 g 0 G - -56.339 -33.316 Td [(n)]TJ +/F62 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 115.61 0 Td [(x)]TJ/F62 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(Size)-250(to)-250(be)-250(r)18(eturned)]TJ 13.838 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(;)-250(default:)-310(entir)18(e)-250(vector)74(.)]TJ +/F60 9.9626 Tf -131.167 -19.132 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -90.182 -34.95 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 115.441 0 Td [(y)]TJ/F62 7.5716 Tf 5.2 -1.494 Td [(:)-13(,)-12(:)]TJ 0 g 0 G +/F60 9.9626 Tf -130.23 -19.131 Td [(A)]TJ 0 g 0 G - 0 -21.361 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 12.299 0 Td [(is)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 118.41 0 Td [(A)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(An)-316(allocatable)-316(array)-316(holding)-316(a)-317(copy)-316(of)-316(the)-316(dense)-316(vector)-316(con-)]TJ -47.87 -11.955 Td [(tents.)-321(If)-254(the)-254(ar)18(gument)]TJ/F52 9.9626 Tf 98.086 0 Td [(n)]TJ/F54 9.9626 Tf 8.192 0 Td [(is)-254(speci\002ed,)-255(the)-253(size)-254(of)-254(the)-254(r)18(eturned)-254(array)-253(equals)]TJ -106.278 -11.955 Td [(the)-339(minimum)-339(between)]TJ/F52 9.9626 Tf 105.247 0 Td [(n)]TJ/F54 9.9626 Tf 9.041 0 Td [(and)-339(the)-339(internal)-339(size)-339(of)-339(the)-339(vector)74(,)-361(or)-339(0)-339(if)]TJ/F52 9.9626 Tf 188.353 0 Td [(n)]TJ/F54 9.9626 Tf 9.04 0 Td [(is)]TJ -311.681 -11.956 Td [(negative;)-314(otherwise,)-303(the)-292(size)-293(of)-292(the)-293(array)-292(is)-293(the)-292(same)-293(as)-292(the)-293(internal)-292(size)]TJ 0 -11.955 Td [(of)-250(the)-250(vector)74(.)]TJ/F51 9.9626 Tf -24.907 -29.183 Td [(3.3.6)-1000(clone)-250(\227)-250(Clone)-250(current)-250(object)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf 0 -19.659 Td [(call)-1050(x%clone\050y,info\051)]TJ +ET +q +1 0 0 1 230.392 517.986 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +BT +/F60 9.9626 Tf 236.992 509.418 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(,)]TJ/F60 9.9626 Tf 5.275 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(,)]TJ/F68 9.9626 Tf 5.106 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(,)]TJ/F68 9.9626 Tf 5.355 0 Td [(b)]TJ/F59 9.9626 Tf 76.437 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 230.392 505.633 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +BT +/F62 9.9626 Tf 236.369 497.065 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 373.603 497.264 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 497.065 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 373.603 485.309 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 485.11 Td [(spmm)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 373.603 473.354 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 473.154 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 373.603 461.398 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 461.199 Td [(spmm)]TJ +ET +q +1 0 0 1 230.392 457.413 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q 0 g 0 G -/F51 9.9626 Tf 0 -22.995 Td [(T)90(ype:)]TJ +BT +/F62 9.9626 Tf 278.277 429.035 Td [(T)92(able)-250(12:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.361 Td [(On)-250(Entry)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -107.398 -24.261 Td [(call)]TJ 0 g 0 G - 0 -21.362 Td [(x)]TJ + [-525(psb_spmm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(dense)-250(vector)74(.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ + [-525(a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -77.918 -34.95 Td [(On)-250(Return)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -21.361 Td [(y)]TJ + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -10.521 -21.361 Td [(info)]TJ + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Return)-250(code.)]TJ/F51 11.9552 Tf -23.801 -31.176 Td [(3.4)-1000(Preconditioner)-250(data)-250(structure)]TJ/F54 9.9626 Tf 0 -19.659 Td [(Our)-396(base)-397(l)1(ibrary)-397(of)18(fers)-396(support)-396(for)-396(simple)-397(well)-396(known)-396(pr)18(econditioners)-396(like)]TJ 0 -11.956 Td [(Diagonal)-250(Scaling)-250(or)-250(Block)-250(Jacobi)-250(with)-250(incomplete)-250(factorization)-250(ILU\0500\051.)]TJ 14.944 -12.314 Td [(A)-361(pr)18(econditioner)-361(is)-361(held)-361(in)-361(the)]TJ/F59 9.9626 Tf 143.781 0 Td [(psb)]TJ -ET -q -1 0 0 1 274.939 168.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 278.077 168.146 Td [(prec)]TJ -ET -q -1 0 0 1 299.626 168.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 302.764 168.146 Td [(type)]TJ/F54 9.9626 Tf 24.519 0 Td [(data)-361(str)8(uctur)18(e)-361(r)18(eported)-361(in)]TJ -227.388 -11.955 Td [(\002gur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-282(4)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.944 -11.955 Td [(call)]TJ 0 g 0 G - [(.)-407(The)]TJ/F59 9.9626 Tf 59.933 0 Td [(psb_prec_type)]TJ/F54 9.9626 Tf 70.808 0 Td [(data)-282(type)-283(may)-282(contain)-282(a)-283(simple)-282(pr)18(econditioning)]TJ -130.741 -11.955 Td [(matrix)-376(with)-376(the)-376(associated)-376(communication)-375(descriptor)74(.The)-376(internal)-376(pr)18(econdi-)]TJ 0 -11.955 Td [(tioner)-317(is)-317(allocated)-318(appr)18(opriately)-317(with)-317(the)-317(dynamic)-318(type)-317(corr)18(esponding)-317(to)-317(the)]TJ 0 -11.955 Td [(desir)18(ed)-250(pr)18(econditioner)74(.)]TJ + [-525(psb_spmm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 166.875 -29.888 Td [(27)]TJ + [-525(a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1009 0 obj -<< -/Length 3969 ->> -stream + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(y,desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -q -1 0 0 1 150.705 671.26 cm -0 0 343.711 38.854 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 163.108 699.454 Td [(type)]TJ + [-525(trans,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_Tprec_type)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 9.415 -10.959 Td [(class)]TJ + [-525(work\051)]TJ 0 g 0 G - [(\050psb_T_base_prec_type\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ +/F59 9.9626 Tf -5.23 -22.618 Td [(T)90(ype:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-525(prec)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -9.415 -10.959 Td [(end)-525(type)]TJ +/F59 9.9626 Tf -29.828 -20.626 Td [(On)-250(Entry)]TJ 0 g 0 G - [-525(psb_Tprec_type)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G + 0 -20.626 Td [(alpha)]TJ 0 g 0 G -/F54 9.9626 Tf 1.845 -41.429 Td [(Listing)-250(4:)-310(The)-250(PSBLAS)-250(de\002ned)-250(data)-250(type)-250(that)-250(contains)-250(a)-250(pr)18(econditioner)74(.)]TJ/F51 11.9552 Tf -14.248 -32.698 Td [(3.5)-1000(Heap)-250(data)-250(structure)]TJ/F54 9.9626 Tf 0 -18.964 Td [(Among)-310(the)-311(tools)-310(r)18(outines)-310(of)-310(sec.)]TJ +/F62 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.368 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-311(6)]TJ + [-250(12)]TJ 0 g 0 G - [(,)-325(we)-310(have)-311(a)-310(number)-310(of)-311(so)1(rting)-311(utilities;)-340(the)]TJ 0 -11.955 Td [(heap)-250(sort)-250(is)-250(implemented)-250(in)-250(terms)-250(of)-250(heaps)-250(having)-250(the)-250(following)-250(signatur)18(es:)]TJ + [(.)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -19.925 Td [(psb)]TJ +/F59 9.9626 Tf -24.907 -20.626 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 164.964 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -157.337 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.242 0 Td [(psb)]TJ ET q -1 0 0 1 167.023 552.764 cm +1 0 0 1 324.173 212.882 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 170.162 552.565 Td [(T)]TJ +/F67 9.9626 Tf 327.311 212.682 Td [(Tspmat)]TJ ET q -1 0 0 1 176.02 552.764 cm +1 0 0 1 359.321 212.882 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 179.158 552.565 Td [(heap)]TJ +/F67 9.9626 Tf 362.459 212.682 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 25.903 0 Td [(:)-333(a)-262(heap)-262(containing)-262(e)1(lements)-262(of)-262(type)-262(T)74(,)-261(wher)18(e)-262(T)-262(can)-261(be)]TJ/F59 9.9626 Tf 242.282 0 Td [(i,s,c,d,z)]TJ/F54 9.9626 Tf -271.731 -11.955 Td [(for)-250(integer)74(,)-250(r)18(eal)-250(and)-250(complex)-250(data;)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F59 9.9626 Tf -24.907 -19.925 Td [(psb)]TJ +/F59 9.9626 Tf -232.675 -20.625 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ ET q -1 0 0 1 167.023 520.884 cm +1 0 0 1 436.673 144.435 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 170.162 520.685 Td [(T)]TJ +/F67 9.9626 Tf 439.811 144.236 Td [(T)]TJ ET q -1 0 0 1 176.02 520.884 cm +1 0 0 1 445.669 144.435 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 179.158 520.685 Td [(idx)]TJ +/F67 9.9626 Tf 448.807 144.236 Td [(vect)]TJ ET q -1 0 0 1 195.476 520.884 cm +1 0 0 1 470.356 144.435 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 198.615 520.685 Td [(heap)]TJ -0 g 0 G -/F54 9.9626 Tf 25.902 0 Td [(:)-289(a)-207(heap)-207(containing)-207(elements)-207(of)-207(type)-207(T)74(,)-207(as)-207(above,)-215(together)-207(with)]TJ -48.906 -11.956 Td [(an)-250(integer)-250(index.)]TJ -24.906 -19.925 Td [(Given)-250(a)-250(heap)-250(object,)-250(the)-250(following)-250(methods)-250(ar)18(e)-250(de\002ned)-250(on)-250(it:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(init)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(Initialize)-250(memory;)-250(also)-250(choose)-250(ascending)-250(or)-250(descending)-250(or)18(der;)]TJ -0 g 0 G -/F51 9.9626 Tf -21.021 -19.925 Td [(howmany)]TJ -0 g 0 G -/F54 9.9626 Tf 50.371 0 Td [(Curr)18(ent)-250(heap)-250(occupancy;)]TJ -0 g 0 G -/F51 9.9626 Tf -50.371 -19.926 Td [(insert)]TJ -0 g 0 G -/F54 9.9626 Tf 30.983 0 Td [(Add)-250(an)-250(item)-250(\050or)-250(an)-250(item)-250(and)-250(its)-250(index\051;)]TJ -0 g 0 G -/F51 9.9626 Tf -30.983 -19.925 Td [(get)]TJ -ET -q -1 0 0 1 165.141 409.302 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 168.129 409.103 Td [(\002rst)]TJ -0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(Remove)-250(and)-250(r)18(eturn)-250(the)-250(\002rst)-250(element;)]TJ -0 g 0 G -/F51 9.9626 Tf -40.109 -19.925 Td [(dump)]TJ +/F67 9.9626 Tf 473.495 144.236 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 32.099 0 Td [(Print)-250(on)-250(\002le;)]TJ -0 g 0 G -/F51 9.9626 Tf -32.099 -19.926 Td [(free)]TJ +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-278(12)]TJ 0 g 0 G -/F54 9.9626 Tf 22.695 0 Td [(Release)-250(memory)111(.)]TJ -22.695 -19.925 Td [(These)-305(objects)-305(ar)18(e)-305(used)-305(in)-305(AMG4PSBLAS)-305(to)-305(implement)-305(the)-305(factorization)-305(algo-)]TJ 0 -11.955 Td [(rithms.)]TJ + [(.)-395(The)-278(rank)-279(of)]TJ/F60 9.9626 Tf 275.498 0 Td [(x)]TJ/F62 9.9626 Tf 7.978 0 Td [(must)-278(be)]TJ -283.476 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ 0 g 0 G - 166.874 -246.934 Td [(28)]TJ + 83.916 -29.888 Td [(48)]TJ 0 g 0 G ET endstream endobj -928 0 obj +1162 0 obj +<< +/Type /ObjStm +/N 100 +/First 984 +/Length 11501 +>> +stream +1152 0 1153 146 1154 290 1155 435 1156 582 1160 726 232 785 1161 843 1157 901 1164 1034 +1166 1152 1163 1210 1171 1291 1167 1448 1168 1592 1169 1738 1173 1885 236 1944 1174 2002 1170 2061 +1176 2194 1178 2312 1179 2370 1175 2427 1185 2521 1181 2678 1182 2822 1183 2968 1187 3115 240 3174 +1188 3232 1184 3291 1193 3424 1189 3581 1190 3725 1191 3868 1195 4015 244 4073 1196 4130 1192 4187 +1198 4320 1200 4438 1201 4497 1197 4555 1206 4649 1202 4806 1203 4950 1204 5096 1208 5243 248 5301 +1209 5358 1205 5416 1211 5549 1213 5667 1210 5726 1218 5807 1214 5964 1215 6107 1216 6253 1220 6400 +252 6458 1221 6515 1217 6572 1224 6692 1226 6810 1227 6869 1228 6928 1229 6987 1230 7046 1231 7105 +1232 7164 1223 7223 1237 7330 1233 7487 1234 7631 1235 7777 1239 7924 256 7982 1240 8039 1236 8097 +1244 8230 1241 8378 1242 8523 1246 8670 260 8729 1247 8787 1243 8845 1251 8965 1248 9113 1249 9258 +1253 9405 264 9463 1255 9520 1250 9577 1262 9712 1256 9878 1257 10025 1258 10170 1259 10312 1264 10458 +% 1152 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.755 372.729 388.729 384.789] +/A << /S /GoTo /D (table.3) >> +>> +% 1153 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 293.733 495.412 305.793] +/A << /S /GoTo /D (vdata) >> +>> +% 1154 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [381.88 281.778 388.854 293.837] +/A << /S /GoTo /D (table.3) >> +>> +% 1155 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 202.781 373.916 214.841] +/A << /S /GoTo /D (descdata) >> +>> +% 1156 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [200.458 119.17 207.432 128.58] +/A << /S /GoTo /D (table.2) >> +>> +% 1160 0 obj +<< +/D [1158 0 R /XYZ 149.705 753.953 null] +>> +% 232 0 obj +<< +/D [1158 0 R /XYZ 150.705 716.092 null] +>> +% 1161 0 obj +<< +/D [1158 0 R /XYZ 150.705 524.97 null] +>> +% 1157 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1164 0 obj << -/Type /ObjStm -/N 100 -/First 881 -/Length 8939 ->> -stream -67 0 923 55 930 148 932 262 71 319 75 375 933 431 79 488 83 542 929 598 -935 691 937 805 87 861 91 916 95 971 934 1026 942 1119 938 1269 939 1426 940 1577 -944 1724 99 1781 103 1837 945 1894 941 1951 949 2044 951 2158 947 2214 952 2270 107 2327 -111 2383 948 2439 954 2545 956 2659 115 2716 119 2773 957 2830 123 2887 953 2943 959 3036 -961 3150 127 3206 131 3262 135 3318 958 3374 964 3467 966 3581 139 3638 143 3695 963 3752 -968 3845 970 3959 147 4015 151 4071 967 4127 972 4220 974 4334 155 4391 159 4448 163 4505 -971 4562 976 4655 978 4769 167 4825 975 4880 982 4973 979 5115 980 5260 984 5407 171 5464 -175 5521 179 5577 183 5633 985 5690 981 5747 988 5840 990 5954 986 6010 187 6066 191 6122 -195 6178 987 6234 995 6340 992 6482 993 6627 997 6771 199 6828 994 6885 1002 6978 999 7115 -1004 7262 204 7320 208 7377 212 7433 1005 7490 1001 7548 1008 7655 1000 7793 1010 7940 1006 7999 -% 67 0 obj +/Type /Page +/Contents 1165 0 R +/Resources 1163 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1134 0 R +>> +% 1166 0 obj << -/D [924 0 R /XYZ 99.895 276.666 null] +/D [1164 0 R /XYZ 98.895 753.953 null] >> -% 923 0 obj +% 1163 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 930 0 obj +% 1171 0 obj << /Type /Page -/Contents 931 0 R -/Resources 929 0 R +/Contents 1172 0 R +/Resources 1170 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/Parent 1134 0 R +/Annots [ 1167 0 R 1168 0 R 1169 0 R ] >> -% 932 0 obj +% 1167 0 obj << -/D [930 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 358.503 495.412 370.562] +/A << /S /GoTo /D (vdata) >> >> -% 71 0 obj +% 1168 0 obj << -/D [930 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [377.462 346.547 384.436 358.607] +/A << /S /GoTo /D (table.4) >> >> -% 75 0 obj +% 1169 0 obj << -/D [930 0 R /XYZ 150.705 519.544 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 280.074 373.916 292.134] +/A << /S /GoTo /D (descdata) >> >> -% 933 0 obj +% 1173 0 obj << -/D [930 0 R /XYZ 397.537 356.277 null] +/D [1171 0 R /XYZ 149.705 753.953 null] >> -% 79 0 obj +% 236 0 obj << -/D [930 0 R /XYZ 150.705 305.6 null] +/D [1171 0 R /XYZ 150.705 716.092 null] >> -% 83 0 obj +% 1174 0 obj << -/D [930 0 R /XYZ 150.705 194.578 null] +/D [1171 0 R /XYZ 150.705 495.665 null] >> -% 929 0 obj +% 1170 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 935 0 obj +% 1176 0 obj << /Type /Page -/Contents 936 0 R -/Resources 934 0 R +/Contents 1177 0 R +/Resources 1175 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R ->> -% 937 0 obj -<< -/D [935 0 R /XYZ 98.895 753.953 null] ->> -% 87 0 obj -<< -/D [935 0 R /XYZ 99.895 583.842 null] +/Parent 1180 0 R >> -% 91 0 obj +% 1178 0 obj << -/D [935 0 R /XYZ 99.895 466.211 null] +/D [1176 0 R /XYZ 98.895 753.953 null] >> -% 95 0 obj +% 1179 0 obj << -/D [935 0 R /XYZ 99.895 256.035 null] +/D [1176 0 R /XYZ 99.895 632.19 null] >> -% 934 0 obj +% 1175 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 942 0 obj +% 1185 0 obj << /Type /Page -/Contents 943 0 R -/Resources 941 0 R +/Contents 1186 0 R +/Resources 1184 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R -/Annots [ 938 0 R 939 0 R 940 0 R ] +/Parent 1180 0 R +/Annots [ 1181 0 R 1182 0 R 1183 0 R ] >> -% 938 0 obj +% 1181 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [187.544 240.293 199.499 249.399] -/A << /S /GoTo /D (cite.DesignPatterns) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 411.694 495.412 423.754] +/A << /S /GoTo /D (vdata) >> >> -% 939 0 obj +% 1182 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[0 1 0] -/Rect [267.981 240.393 279.936 249.399] -/A << /S /GoTo /D (cite.Sparse03) >> +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [377.462 399.739 384.436 411.798] +/A << /S /GoTo /D (table.5) >> >> -% 940 0 obj +% 1183 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [458.483 237.643 465.457 249.703] -/A << /S /GoTo /D (listing.2) >> ->> -% 944 0 obj -<< -/D [942 0 R /XYZ 149.705 753.953 null] +/Rect [306.858 331.993 373.916 344.052] +/A << /S /GoTo /D (descdata) >> >> -% 99 0 obj +% 1187 0 obj << -/D [942 0 R /XYZ 150.705 544.277 null] +/D [1185 0 R /XYZ 149.705 753.953 null] >> -% 103 0 obj +% 240 0 obj << -/D [942 0 R /XYZ 150.705 296.936 null] +/D [1185 0 R /XYZ 150.705 716.092 null] >> -% 945 0 obj +% 1188 0 obj << -/D [942 0 R /XYZ 170.328 265.359 null] +/D [1185 0 R /XYZ 150.705 555.856 null] >> -% 941 0 obj +% 1184 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 949 0 obj +% 1193 0 obj << /Type /Page -/Contents 950 0 R -/Resources 948 0 R +/Contents 1194 0 R +/Resources 1192 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R +/Parent 1180 0 R +/Annots [ 1189 0 R 1190 0 R 1191 0 R ] >> -% 951 0 obj +% 1189 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 362.555 444.603 374.615] +/A << /S /GoTo /D (vdata) >> +>> +% 1190 0 obj << -/D [949 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 350.6 333.626 362.66] +/A << /S /GoTo /D (table.6) >> >> -% 947 0 obj +% 1191 0 obj << -/D [949 0 R /XYZ 99.895 665.282 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 283.114 323.106 295.173] +/A << /S /GoTo /D (descdata) >> >> -% 952 0 obj +% 1195 0 obj << -/D [949 0 R /XYZ 409.052 603.446 null] +/D [1193 0 R /XYZ 98.895 753.953 null] >> -% 107 0 obj +% 244 0 obj << -/D [949 0 R /XYZ 99.895 294.773 null] +/D [1193 0 R /XYZ 99.895 716.092 null] >> -% 111 0 obj +% 1196 0 obj << -/D [949 0 R /XYZ 99.895 276.048 null] +/D [1193 0 R /XYZ 99.895 505.29 null] >> -% 948 0 obj +% 1192 0 obj << -/Font << /F94 915 0 R /F54 586 0 R /F59 812 0 R /F51 584 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 954 0 obj +% 1198 0 obj << /Type /Page -/Contents 955 0 R -/Resources 953 0 R +/Contents 1199 0 R +/Resources 1197 0 R /MediaBox [0 0 595.276 841.89] -/Parent 927 0 R ->> -% 956 0 obj -<< -/D [954 0 R /XYZ 149.705 753.953 null] ->> -% 115 0 obj -<< -/D [954 0 R /XYZ 150.705 716.092 null] ->> -% 119 0 obj -<< -/D [954 0 R /XYZ 150.705 540.892 null] +/Parent 1180 0 R >> -% 957 0 obj +% 1200 0 obj << -/D [954 0 R /XYZ 150.705 358.382 null] +/D [1198 0 R /XYZ 149.705 753.953 null] >> -% 123 0 obj +% 1201 0 obj << -/D [954 0 R /XYZ 150.705 300.51 null] +/D [1198 0 R /XYZ 150.705 632.19 null] >> -% 953 0 obj +% 1197 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 959 0 obj +% 1206 0 obj << /Type /Page -/Contents 960 0 R -/Resources 958 0 R +/Contents 1207 0 R +/Resources 1205 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R ->> -% 961 0 obj -<< -/D [959 0 R /XYZ 98.895 753.953 null] ->> -% 127 0 obj -<< -/D [959 0 R /XYZ 99.895 716.092 null] ->> -% 131 0 obj -<< -/D [959 0 R /XYZ 99.895 526.761 null] +/Parent 1180 0 R +/Annots [ 1202 0 R 1203 0 R 1204 0 R ] >> -% 135 0 obj +% 1202 0 obj << -/D [959 0 R /XYZ 99.895 326.359 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 291.495 444.603 303.554] +/A << /S /GoTo /D (vdata) >> >> -% 958 0 obj +% 1203 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 279.539 333.626 291.599] +/A << /S /GoTo /D (table.7) >> >> -% 964 0 obj +% 1204 0 obj << -/Type /Page -/Contents 965 0 R -/Resources 963 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 209.259 323.106 221.319] +/A << /S /GoTo /D (descdata) >> >> -% 966 0 obj +% 1208 0 obj << -/D [964 0 R /XYZ 149.705 753.953 null] +/D [1206 0 R /XYZ 98.895 753.953 null] >> -% 139 0 obj +% 248 0 obj << -/D [964 0 R /XYZ 150.705 716.092 null] +/D [1206 0 R /XYZ 99.895 716.092 null] >> -% 143 0 obj +% 1209 0 obj << -/D [964 0 R /XYZ 150.705 474.131 null] +/D [1206 0 R /XYZ 99.895 443.893 null] >> -% 963 0 obj +% 1205 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 968 0 obj +% 1211 0 obj << /Type /Page -/Contents 969 0 R -/Resources 967 0 R +/Contents 1212 0 R +/Resources 1210 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R ->> -% 970 0 obj -<< -/D [968 0 R /XYZ 98.895 753.953 null] ->> -% 147 0 obj -<< -/D [968 0 R /XYZ 99.895 716.092 null] +/Parent 1180 0 R >> -% 151 0 obj +% 1213 0 obj << -/D [968 0 R /XYZ 99.895 376.562 null] +/D [1211 0 R /XYZ 149.705 753.953 null] >> -% 967 0 obj +% 1210 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 972 0 obj +% 1218 0 obj << /Type /Page -/Contents 973 0 R -/Resources 971 0 R +/Contents 1219 0 R +/Resources 1217 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/Parent 1222 0 R +/Annots [ 1214 0 R 1215 0 R 1216 0 R ] >> -% 974 0 obj -<< -/D [972 0 R /XYZ 149.705 753.953 null] ->> -% 155 0 obj -<< -/D [972 0 R /XYZ 150.705 716.092 null] ->> -% 159 0 obj +% 1214 0 obj << -/D [972 0 R /XYZ 150.705 484.709 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 314.69 444.603 326.749] +/A << /S /GoTo /D (vdata) >> >> -% 163 0 obj +% 1215 0 obj << -/D [972 0 R /XYZ 150.705 251.325 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 302.734 333.626 314.794] +/A << /S /GoTo /D (table.8) >> >> -% 971 0 obj +% 1216 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 232.693 323.106 244.753] +/A << /S /GoTo /D (descdata) >> >> -% 976 0 obj +% 1220 0 obj << -/Type /Page -/Contents 977 0 R -/Resources 975 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 962 0 R +/D [1218 0 R /XYZ 98.895 753.953 null] >> -% 978 0 obj +% 252 0 obj << -/D [976 0 R /XYZ 98.895 753.953 null] +/D [1218 0 R /XYZ 99.895 716.092 null] >> -% 167 0 obj +% 1221 0 obj << -/D [976 0 R /XYZ 99.895 476.15 null] +/D [1218 0 R /XYZ 99.895 504.73 null] >> -% 975 0 obj +% 1217 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 982 0 obj +% 1224 0 obj << /Type /Page -/Contents 983 0 R -/Resources 981 0 R +/Contents 1225 0 R +/Resources 1223 0 R /MediaBox [0 0 595.276 841.89] -/Parent 962 0 R -/Annots [ 979 0 R 980 0 R ] ->> -% 979 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [248.894 164.341 255.868 176.4] -/A << /S /GoTo /D (section.6) >> +/Parent 1222 0 R >> -% 980 0 obj +% 1226 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [343.512 128.475 350.485 140.535] -/A << /S /GoTo /D (listing.3) >> +/D [1224 0 R /XYZ 149.705 753.953 null] >> -% 984 0 obj +% 1227 0 obj << -/D [982 0 R /XYZ 149.705 753.953 null] +/D [1224 0 R /XYZ 150.705 564.444 null] >> -% 171 0 obj +% 1228 0 obj << -/D [982 0 R /XYZ 150.705 716.092 null] +/D [1224 0 R /XYZ 150.705 504.067 null] >> -% 175 0 obj +% 1229 0 obj << -/D [982 0 R /XYZ 150.705 586.94 null] +/D [1224 0 R /XYZ 175.611 506.876 null] >> -% 179 0 obj +% 1230 0 obj << -/D [982 0 R /XYZ 150.705 402.59 null] +/D [1224 0 R /XYZ 175.611 494.921 null] >> -% 183 0 obj +% 1231 0 obj << -/D [982 0 R /XYZ 150.705 234.114 null] +/D [1224 0 R /XYZ 175.611 482.966 null] >> -% 985 0 obj +% 1232 0 obj << -/D [982 0 R /XYZ 170.799 204.012 null] +/D [1224 0 R /XYZ 175.611 471.011 null] >> -% 981 0 obj +% 1223 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 988 0 obj +% 1237 0 obj << /Type /Page -/Contents 989 0 R -/Resources 987 0 R +/Contents 1238 0 R +/Resources 1236 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R +/Parent 1222 0 R +/Annots [ 1233 0 R 1234 0 R 1235 0 R ] >> -% 990 0 obj +% 1233 0 obj << -/D [988 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 416.057 444.603 428.117] +/A << /S /GoTo /D (vdata) >> >> -% 986 0 obj +% 1234 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [326.652 404.102 333.626 416.161] +/A << /S /GoTo /D (table.9) >> +>> +% 1235 0 obj << -/D [988 0 R /XYZ 99.895 446.997 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 336.356 323.106 348.415] +/A << /S /GoTo /D (descdata) >> >> -% 187 0 obj +% 1239 0 obj << -/D [988 0 R /XYZ 99.895 387.147 null] +/D [1237 0 R /XYZ 98.895 753.953 null] >> -% 191 0 obj +% 256 0 obj << -/D [988 0 R /XYZ 99.895 370.604 null] +/D [1237 0 R /XYZ 99.895 716.092 null] >> -% 195 0 obj +% 1240 0 obj << -/D [988 0 R /XYZ 99.895 194.093 null] +/D [1237 0 R /XYZ 99.895 560.219 null] >> -% 987 0 obj +% 1236 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 995 0 obj +% 1244 0 obj << /Type /Page -/Contents 996 0 R -/Resources 994 0 R +/Contents 1245 0 R +/Resources 1243 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 992 0 R 993 0 R ] +/Parent 1222 0 R +/Annots [ 1241 0 R 1242 0 R ] >> -% 992 0 obj +% 1241 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 383.557 435.942 395.616] -/A << /S /GoTo /D (table.1) >> +/Rect [306.858 340.341 384.376 352.401] +/A << /S /GoTo /D (spdata) >> >> -% 993 0 obj +% 1242 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 240.08 435.942 252.139] -/A << /S /GoTo /D (table.1) >> +/Rect [306.858 272.595 373.916 284.655] +/A << /S /GoTo /D (descdata) >> >> -% 997 0 obj +% 1246 0 obj << -/D [995 0 R /XYZ 149.705 753.953 null] +/D [1244 0 R /XYZ 149.705 753.953 null] >> -% 199 0 obj +% 260 0 obj << -/D [995 0 R /XYZ 150.705 610.712 null] +/D [1244 0 R /XYZ 150.705 716.092 null] >> -% 994 0 obj +% 1247 0 obj +<< +/D [1244 0 R /XYZ 150.705 517.78 null] +>> +% 1243 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1002 0 obj +% 1251 0 obj << /Type /Page -/Contents 1003 0 R -/Resources 1001 0 R +/Contents 1252 0 R +/Resources 1250 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 999 0 R ] +/Parent 1222 0 R +/Annots [ 1248 0 R 1249 0 R ] >> -% 999 0 obj +% 1248 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [127.814 152.385 134.788 164.445] -/A << /S /GoTo /D (listing.4) >> ->> -% 1004 0 obj -<< -/D [1002 0 R /XYZ 98.895 753.953 null] +/Rect [256.048 340.341 333.567 352.401] +/A << /S /GoTo /D (spdata) >> >> -% 204 0 obj +% 1249 0 obj << -/D [1002 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 272.595 323.106 284.655] +/A << /S /GoTo /D (descdata) >> >> -% 208 0 obj +% 1253 0 obj << -/D [1002 0 R /XYZ 99.895 430.41 null] +/D [1251 0 R /XYZ 98.895 753.953 null] >> -% 212 0 obj +% 264 0 obj << -/D [1002 0 R /XYZ 99.895 226.203 null] +/D [1251 0 R /XYZ 99.895 716.092 null] >> -% 1005 0 obj +% 1255 0 obj << -/D [1002 0 R /XYZ 258.62 168.146 null] +/D [1251 0 R /XYZ 99.895 517.78 null] >> -% 1001 0 obj +% 1250 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F104 1254 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1008 0 obj +% 1262 0 obj << /Type /Page -/Contents 1009 0 R -/Resources 1007 0 R +/Contents 1263 0 R +/Resources 1261 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 1000 0 R ] +/Parent 1222 0 R +/Annots [ 1256 0 R 1257 0 R 1258 0 R 1259 0 R ] >> -% 1000 0 obj +% 1256 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [297.461 580.64 304.435 592.699] -/A << /S /GoTo /D (section.6) >> ->> -% 1010 0 obj -<< -/D [1008 0 R /XYZ 149.705 753.953 null] ->> -% 1006 0 obj -<< -/D [1008 0 R /XYZ 150.705 665.282 null] +/Rect [428.968 277.323 440.924 289.383] +/A << /S /GoTo /D (table.12) >> >> - -endstream -endobj -1014 0 obj +% 1257 0 obj << -/Length 158 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [306.858 208.877 384.376 220.936] +/A << /S /GoTo /D (spdata) >> >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 99.895 705.784 Td [(4)-1000(Computational)-250(routines)]TJ -0 g 0 G -/F54 9.9626 Tf 166.875 -615.346 Td [(29)]TJ -0 g 0 G -ET - -endstream -endobj -1025 0 obj +% 1258 0 obj << -/Length 7171 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.1)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(geaxpby)-250(\227)-250(General)-250(Dense)-250(Matrix)-250(Sum)]TJ/F54 9.9626 Tf -51.12 -19.189 Td [(This)-358(subr)18(outine)-358(is)-359(an)-358(interface)-358(to)-358(the)-358(computational)-359(kernel)-358(for)-358(dense)-358(matrix)]TJ 0 -11.955 Td [(sum:)]TJ/F52 9.9626 Tf 143.149 -12.304 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.397 0 Td [(a)]TJ/F52 9.9626 Tf 7.616 0 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -175.407 -18.398 Td [(call)]TJ -0 g 0 G - [-525(psb_geaxpby\050alpha,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 227.737 629.682 cm -[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S -Q -BT -/F52 9.9626 Tf 234.009 621.114 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(,)]TJ/F60 9.9626 Tf 5.105 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(,)]TJ/F60 9.9626 Tf 5.355 0 Td [(b)]TJ/F51 9.9626 Tf 89.359 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 227.737 617.328 cm -[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S -Q -BT -/F54 9.9626 Tf 233.715 608.761 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 370.948 608.96 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 608.761 Td [(geaxpby)]TJ -140.222 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 370.948 597.005 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 596.805 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 370.948 585.05 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 584.85 Td [(geaxpby)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 370.948 573.094 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 373.937 572.895 Td [(geaxpby)]TJ -ET -q -1 0 0 1 227.737 569.109 cm -[]0 d 0 J 0.398 w 0 0 m 189.647 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 280.768 540.731 Td [(T)92(able)-250(1:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -130.063 -35.05 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.39 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -20.391 Td [(alpha)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.368 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -20.391 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(o)1(r)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 349.068 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 348.869 Td [(T)]TJ -ET -q -1 0 0 1 445.669 349.068 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 348.869 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 349.068 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 348.869 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-312(1)]TJ -0 g 0 G - [(.)-494(The)-312(rank)-312(of)]TJ/F52 9.9626 Tf 274.834 0 Td [(x)]TJ/F54 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -82.958 -20.391 Td [(beta)]TJ -0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.618 0 Td [(b)]TJ/F54 9.9626 Tf 5.524 0 Td [(.)]TJ -49.584 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.391 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 191.754 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -182.473 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 188.736 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 188.537 Td [(T)]TJ -ET -q -1 0 0 1 445.669 188.736 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 188.537 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 188.736 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 188.537 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-276(numbers)-277(of)-276(the)-276(type)-276(indicated)-277(in)-276(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-276(1)]TJ -0 g 0 G - [(.)-389(The)-276(rank)-277(of)]TJ/F52 9.9626 Tf 288.67 0 Td [(y)]TJ/F54 9.9626 Tf 7.859 0 Td [(must)]TJ -296.529 -11.955 Td [(be)-250(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 65.888 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -95.999 -20.39 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 144.236 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ -0 g 0 G -/F54 9.9626 Tf 114.879 -29.888 Td [(30)]TJ -0 g 0 G -ET +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [419.358 140.43 495.412 152.49] +/A << /S /GoTo /D (vdata) >> +>> +% 1259 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [379.43 128.475 391.385 140.535] +/A << /S /GoTo /D (table.12) >> +>> +% 1264 0 obj +<< +/D [1262 0 R /XYZ 149.705 753.953 null] +>> endstream endobj -1032 0 obj +1279 0 obj << -/Length 2404 +/Length 6532 >> stream 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 694.373 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 694.174 Td [(desc)]TJ -ET -q -1 0 0 1 298.05 694.373 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 301.189 694.174 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(beta)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -21.918 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.617 0 Td [(b)]TJ/F62 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(12)]TJ 0 g 0 G + [(.)]TJ 0 g 0 G - 0 -19.925 Td [(y)]TJ +/F59 9.9626 Tf -24.907 -18.597 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.68 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 385.864 604.709 cm +1 0 0 1 385.864 592.09 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 604.51 Td [(T)]TJ +/F67 9.9626 Tf 389.002 591.891 Td [(T)]TJ ET q -1 0 0 1 394.86 604.709 cm +1 0 0 1 394.86 592.09 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 604.51 Td [(vect)]TJ +/F67 9.9626 Tf 397.998 591.891 Td [(vect)]TJ ET q -1 0 0 1 419.547 604.709 cm +1 0 0 1 419.547 592.09 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 604.51 Td [(type)]TJ +/F67 9.9626 Tf 422.685 591.891 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf -297.883 -11.956 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -434.371 Td [(31)]TJ -0 g 0 G -ET - -endstream -endobj -1042 0 obj -<< -/Length 7447 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.2)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(gedot)-250(\227)-250(Dot)-250(Product)]TJ/F54 9.9626 Tf -51.12 -18.976 Td [(This)-250(function)-250(computes)-250(dot)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F52 9.9626 Tf 254.647 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.481 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -286.93 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.459 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 7.597 0 Td [(ar)18(e)-250(r)18(eal)-250(vectors)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F52 9.9626 Tf 104.717 -23.132 Td [(d)-25(o)-35(t)]TJ/F83 10.3811 Tf 16.337 0 Td [(\040)]TJ/F52 9.9626 Tf 13.566 0 Td [(x)]TJ/F52 7.5716 Tf 5.399 4.115 Td [(T)]TJ/F52 9.9626 Tf 5.525 -4.115 Td [(y)]TJ/F54 9.9626 Tf -189.778 -21.93 Td [(Else)-250(if)]TJ/F52 9.9626 Tf 29.474 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 7.596 0 Td [(ar)18(e)-250(complex)-250(vectors)-250(then)-250(it)-250(computes)-250(dot-pr)18(oduct)-250(as:)]TJ/F52 9.9626 Tf 83.965 -23.132 Td [(d)-25(o)-35(t)]TJ/F83 10.3811 Tf 16.336 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F52 7.5716 Tf 5.588 4.115 Td [(H)]TJ/F52 9.9626 Tf 6.812 -4.115 Td [(y)]TJ/F59 9.9626 Tf -175.572 -21.937 Td [(psb_gedot\050x,)-525(y,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -0 g 0 G + [-280(12)]TJ 0 g 0 G + [(.)-400(The)-280(rank)-280(of)]TJ/F60 9.9626 Tf 275.562 0 Td [(y)]TJ/F62 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 53.115 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -83.228 -18.597 Td [(desc)]TJ ET q -1 0 0 1 233.929 570.686 cm -[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S -Q -BT -/F52 9.9626 Tf 240.031 562.118 Td [(d)-25(o)-35(t)]TJ/F54 9.9626 Tf 13.444 0 Td [(,)]TJ/F52 9.9626 Tf 5.276 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 91.76 0 Td [(Function)]TJ -ET -q -1 0 0 1 233.929 558.332 cm -[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S -Q -BT -/F54 9.9626 Tf 239.906 549.765 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 377.14 549.964 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 380.129 549.765 Td [(gedot)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 377.14 538.009 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 380.129 537.809 Td [(gedot)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 377.14 526.053 cm +1 0 0 1 120.408 549.583 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 380.129 525.854 Td [(gedot)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F59 9.9626 Tf 123.397 549.383 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 377.14 514.098 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 273.363 501.762 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 380.129 513.899 Td [(gedot)]TJ +/F67 9.9626 Tf 276.501 501.563 Td [(desc)]TJ ET q -1 0 0 1 233.929 510.113 cm -[]0 d 0 J 0.398 w 0 0 m 177.263 0 l S +1 0 0 1 298.05 501.762 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 280.768 481.735 Td [(T)92(able)-250(2:)-310(Data)-250(types)]TJ +/F67 9.9626 Tf 301.189 501.563 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -222.215 -18.597 Td [(trans)]TJ 0 g 0 G -/F51 9.9626 Tf -130.063 -34.507 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 27.666 0 Td [(indicates)-250(what)-250(kind)-250(of)-250(operation)-250(to)-250(perform.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -2.759 -18.597 Td [(trans)-250(=)-250(N)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.951 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 46.983 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ +0 0 1 rg 0 0 1 RG + [-250(1)]TJ 0 g 0 G 0 g 0 G - 0 -19.951 Td [(x)]TJ +/F59 9.9626 Tf -46.983 -14.612 Td [(trans)-250(=)-250(T)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 45.33 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 359.705 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 359.506 Td [(T)]TJ -ET -q -1 0 0 1 445.669 359.705 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 359.506 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 359.705 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 359.506 Td [(type)]TJ + [-250(2)]TJ +0 g 0 G +0 g 0 G +/F59 9.9626 Tf -45.33 -14.612 Td [(trans)-250(=)-250(C)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +/F62 9.9626 Tf 45.878 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ 0 0 1 rg 0 0 1 RG - [-312(2)]TJ + [-250(3)]TJ 0 g 0 G - [(.)-494(The)-312(rank)-312(of)]TJ/F52 9.9626 Tf 274.834 0 Td [(x)]TJ/F54 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ + -45.878 -18.597 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F93 10.3811 Tf 25.193 0 Td [(=)]TJ/F60 9.9626 Tf 11.434 0 Td [(N)]TJ/F62 9.9626 Tf -75.267 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -82.958 -19.951 Td [(y)]TJ +/F59 9.9626 Tf -24.907 -18.596 Td [(work)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 268.023 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 267.824 Td [(T)]TJ -ET -q -1 0 0 1 445.669 268.023 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 448.807 267.824 Td [(vect)]TJ -ET -q -1 0 0 1 470.356 268.023 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 473.495 267.824 Td [(type)]TJ +/F62 9.9626 Tf 28.782 0 Td [(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-270(as:)-351(a)-270(rank)-270(one)-270(array)-271(of)-270(the)-270(same)-270(type)-271(of)]TJ/F60 9.9626 Tf 220.875 0 Td [(x)]TJ/F62 9.9626 Tf 7.898 0 Td [(and)]TJ/F60 9.9626 Tf 19.684 0 Td [(y)]TJ/F62 9.9626 Tf 7.798 0 Td [(with)-270(the)-270(T)74(AR-)]TJ -256.255 -11.955 Td [(GET)-250(attribute.)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-313(2)]TJ +/F59 9.9626 Tf -24.907 -18.597 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)-500(The)-314(rank)-313(of)]TJ/F52 9.9626 Tf 274.898 0 Td [(y)]TJ/F54 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.956 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.116 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -83.227 -19.95 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 224.162 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 223.963 Td [(a)]TJ + 0 -18.597 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(matrix)]TJ/F60 9.9626 Tf 144.939 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -135.659 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 324.173 176.341 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 327.311 176.142 Td [(desc)]TJ -ET -q -1 0 0 1 348.86 176.341 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 351.998 176.142 Td [(type)]TJ + [-250(12)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.951 Td [(global)]TJ +/F59 9.9626 Tf -24.907 -18.597 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 76.693 -29.888 Td [(32)]TJ + 141.968 -36.529 Td [(49)]TJ 0 g 0 G ET endstream endobj -1048 0 obj +1286 0 obj << -/Length 3827 +/Length 8092 >> stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 124.802 706.129 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.318 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(4.13)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(spsm)-250(\227)-250(T)111(riangular)-250(System)-250(Solve)]TJ/F62 9.9626 Tf -57.098 -19.83 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(T)90(riangular)-250(System)-250(Solve:)]TJ/F60 9.9626 Tf 122.724 -35.213 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F62 7.5716 Tf 6.228 0 Td [(1)]TJ/F60 9.9626 Tf 4.577 -4.115 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -82.196 -16.139 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F91 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F62 7.5716 Tf 6.227 0 Td [(1)]TJ/F60 9.9626 Tf 4.578 -4.114 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.139 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F62 7.5716 Tf 6.228 0 Td [(1)]TJ/F60 9.9626 Tf 4.607 -4.114 Td [(D)-52(x)]TJ/F93 10.3811 Tf 15.536 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F60 7.5716 Tf 6.421 0 Td [(T)]TJ/F60 9.9626 Tf 5.694 -4.114 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -83.506 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F91 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F60 7.5716 Tf 6.42 0 Td [(T)]TJ/F60 9.9626 Tf 5.695 -4.114 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F60 7.5716 Tf 6.421 0 Td [(T)]TJ/F60 9.9626 Tf 5.724 -4.114 Td [(D)-52(x)]TJ/F93 10.3811 Tf 15.536 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.091 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F60 7.5716 Tf 6.61 0 Td [(H)]TJ/F60 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.648 0 Td [(y)]TJ -84.982 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F91 7.8896 Tf 14.775 4.115 Td [(\000)]TJ/F60 7.5716 Tf 6.61 0 Td [(H)]TJ/F60 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F93 10.3811 Tf 7.267 0 Td [(+)]TJ/F68 9.9626 Tf 10.504 0 Td [(b)]TJ/F60 9.9626 Tf 5.649 0 Td [(y)]TJ -93.281 -16.09 Td [(y)]TJ/F91 10.3811 Tf 15.193 0 Td [(\040)]TJ/F68 9.9626 Tf 20.593 0 Td [(a)]TJ/F60 9.9626 Tf 5.639 0 Td [(T)]TJ/F91 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F60 7.5716 Tf 6.61 0 Td [(H)]TJ/F60 9.9626 Tf 7.012 -4.115 Td [(D)-52(x)]TJ/F93 10.3811 Tf 15.536 0 Td [(+)]TJ/F68 9.9626 Tf 10.505 0 Td [(b)]TJ/F60 9.9626 Tf 5.648 0 Td [(y)]TJ/F62 9.9626 Tf -201.061 -38.202 Td [(wher)18(e:)]TJ 0 g 0 G - [(.true.)]TJ +/F60 9.9626 Tf -14.65 -21.265 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -31.881 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 115.61 0 Td [(x)]TJ/F62 7.5716 Tf 5.201 -1.495 Td [(:)-12(,)-13(:)]TJ 0 g 0 G +/F60 9.9626 Tf -131.167 -20.218 Td [(y)]TJ 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 115.441 0 Td [(y)]TJ/F62 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F52 9.9626 Tf 126.33 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -206.483 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F60 9.9626 Tf -130.599 -20.218 Td [(T)]TJ 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F62 9.9626 Tf 11.432 0 Td [(is)-250(the)-250(global)-250(sparse)-250(block)-250(triangular)-250(submatrix)]TJ/F60 9.9626 Tf 206.797 0 Td [(T)]TJ 0 g 0 G - [(.)]TJ + -218.159 -21.712 Td [(D)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +/F62 9.9626 Tf 12.956 0 Td [(is)-250(the)-250(scaling)-250(diagonal)-250(matrix.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 6.894 -21.266 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ + [-525(psb_spsm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-204(multiple)-204(dot)-204(pr)18(oducts)-204(at)-204(the)-204(same)-204(time;)-219(in)-204(this)-204(case,)-213(it)-204(is)-204(possible)]TJ 0 -11.955 Td [(to)-250(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ/F59 9.9626 Tf 52.303 -19.925 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [-525(t,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gedot\050x1,y1,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.\051)]TJ 0 -11.956 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(2)]TJ + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gedot\050x2,y2,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.944 -11.955 Td [(call)]TJ 0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ + [-525(psb_spsm\050alpha,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gedot\050x3,y3,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(t,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(.false.\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_sum\050ctxt,vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ + [-525(beta,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\051\051)]TJ/F54 9.9626 Tf -52.303 -19.925 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -282.939 Td [(33)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1059 0 obj -<< -/Length 8275 ->> -stream + [-525(trans,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(unit,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.3)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(gedots)-250(\227)-250(Generalized)-250(Dot)-250(Product)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(This)-283(subr)18(outine)-284(computes)-283(a)-284(series)-284(of)-283(dot)-284(pr)18(oducts)-283(among)-284(the)-283(columns)-284(of)-283(two)]TJ 0 -11.955 Td [(dense)-250(matrices)]TJ/F52 9.9626 Tf 68.208 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(:)]TJ/F52 9.9626 Tf 24.807 -13.101 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.293 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)-12(,)]TJ/F52 9.9626 Tf 6.821 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F52 7.5716 Tf 4.343 4.114 Td [(T)]TJ/F52 9.9626 Tf 5.525 -4.114 Td [(y)]TJ/F85 10.3811 Tf 5.23 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F52 9.9626 Tf 6.821 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf -214.288 -16.876 Td [(If)-300(the)-299(matrices)-300(ar)18(e)-299(complex,)-312(then)-300(the)-300(usual)-299(convention)-300(applies,)-312(i.e.)-459(the)-299(conju-)]TJ 0 -11.955 Td [(gate)-239(transpose)-239(of)]TJ/F52 9.9626 Tf 77.351 0 Td [(x)]TJ/F54 9.9626 Tf 7.589 0 Td [(is)-239(used.)-307(If)]TJ/F52 9.9626 Tf 45.493 0 Td [(x)]TJ/F54 9.9626 Tf 7.589 0 Td [(and)]TJ/F52 9.9626 Tf 19.375 0 Td [(y)]TJ/F54 9.9626 Tf 7.489 0 Td [(ar)18(e)-239(of)-239(rank)-240(one,)-241(then)]TJ/F52 9.9626 Tf 92.601 0 Td [(r)-17(e)-25(s)]TJ/F54 9.9626 Tf 14.552 0 Td [(is)-239(a)-240(scalar)75(,)-242(else)-239(it)]TJ -272.039 -11.955 Td [(is)-250(a)-250(rank)-250(one)-250(array)111(.)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 20.174 -11.955 Td [(call)]TJ + [-525(choice,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gedots\050res,)-525(x,)-525(y,)-525(desc_a,)-525(info\051)]TJ + [-525(diag,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(work\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 230.392 595.704 cm +1 0 0 1 230.392 339.439 cm []0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 236.394 587.136 Td [(r)-17(e)-25(s)]TJ/F54 9.9626 Tf 12.17 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 93.135 0 Td [(Subroutine)]TJ +/F60 9.9626 Tf 236.623 330.871 Td [(T)]TJ/F62 9.9626 Tf 6.451 0 Td [(,)]TJ/F60 9.9626 Tf 5.275 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(,)]TJ/F60 9.9626 Tf 5.306 0 Td [(D)]TJ/F62 9.9626 Tf 7.975 0 Td [(,)]TJ/F68 9.9626 Tf 5.105 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(,)]TJ/F68 9.9626 Tf 5.355 0 Td [(b)]TJ/F59 9.9626 Tf 64.393 0 Td [(Subroutine)]TJ ET q -1 0 0 1 230.392 583.351 cm +1 0 0 1 230.392 327.085 cm []0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 236.369 574.783 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 236.369 318.517 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 373.603 574.982 cm +1 0 0 1 373.603 318.716 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 574.783 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 376.592 318.517 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 373.603 563.027 cm +1 0 0 1 373.603 306.761 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 562.828 Td [(gedots)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 376.592 306.562 Td [(spsm)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 373.603 551.072 cm +1 0 0 1 373.603 294.806 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 550.872 Td [(gedots)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 376.592 294.607 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 373.603 539.116 cm +1 0 0 1 373.603 282.851 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 538.917 Td [(gedots)]TJ +/F62 9.9626 Tf 376.592 282.652 Td [(spsm)]TJ ET q -1 0 0 1 230.392 535.131 cm +1 0 0 1 230.392 278.866 cm []0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 280.768 506.753 Td [(T)92(able)-250(3:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 278.277 250.487 Td [(T)92(able)-250(13:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -32.002 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -127.572 -38.916 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.22 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -21.712 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.22 Td [(x)]TJ + 0 -21.713 Td [(alpha)]TJ +0 g 0 G +/F62 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.368 0 Td [(a)]TJ/F62 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G + 141.967 -29.888 Td [(50)]TJ +0 g 0 G +ET + +endstream +endobj +1297 0 obj +<< +/Length 7295 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(t)]TJ +0 g 0 G +/F62 9.9626 Tf 8.299 0 Td [(the)-250(global)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 171.221 0 Td [(T)]TJ/F62 9.9626 Tf 6.451 0 Td [(.)]TJ -161.064 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(type)-250(speci\002ed)-250(in)-250(\247)]TJ +0 0 1 rg 0 0 1 RG + [-250(3)]TJ +0 g 0 G + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F59 9.9626 Tf -24.907 -20.65 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.614 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 388.689 cm +1 0 0 1 385.864 590.037 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 388.49 Td [(T)]TJ +/F67 9.9626 Tf 389.002 589.838 Td [(T)]TJ ET q -1 0 0 1 445.669 388.689 cm +1 0 0 1 394.86 590.037 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 388.49 Td [(vect)]TJ +/F67 9.9626 Tf 397.998 589.838 Td [(vect)]TJ ET q -1 0 0 1 470.356 388.689 cm +1 0 0 1 419.547 590.037 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 388.49 Td [(type)]TJ +/F67 9.9626 Tf 422.685 589.838 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-312(numbers)-311(of)-312(type)-311(speci\002ed)-312(in)-311(T)92(able)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-312(3)]TJ + [-278(13)]TJ +0 g 0 G + [(.)-395(The)-278(rank)-278(of)]TJ/F60 9.9626 Tf 275.498 0 Td [(x)]TJ/F62 9.9626 Tf 7.977 0 Td [(must)-278(be)]TJ -283.475 -11.956 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 52.946 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -82.959 -20.649 Td [(beta)]TJ +0 g 0 G +/F62 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F68 9.9626 Tf 44.617 0 Td [(b)]TJ/F62 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ 0 g 0 G - [(.)-494(The)-312(rank)-312(of)]TJ/F52 9.9626 Tf 274.834 0 Td [(x)]TJ/F54 9.9626 Tf 8.31 0 Td [(must)-311(be)]TJ -283.144 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ + [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -82.958 -19.221 Td [(y)]TJ +/F59 9.9626 Tf -24.907 -20.65 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 297.738 cm +1 0 0 1 385.864 429.186 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 297.539 Td [(T)]TJ +/F67 9.9626 Tf 389.002 428.986 Td [(T)]TJ ET q -1 0 0 1 445.669 297.738 cm +1 0 0 1 394.86 429.186 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 297.539 Td [(vect)]TJ +/F67 9.9626 Tf 397.998 428.986 Td [(vect)]TJ ET q -1 0 0 1 470.356 297.738 cm +1 0 0 1 419.547 429.186 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 297.539 Td [(type)]TJ +/F67 9.9626 Tf 422.685 428.986 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-313(numbers)-314(of)-313(type)-313(speci\002ed)-314(in)-313(T)92(able)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-313(3)]TJ + [-280(13)]TJ 0 g 0 G - [(.)-500(The)-314(rank)-313(of)]TJ/F52 9.9626 Tf 274.898 0 Td [(y)]TJ/F54 9.9626 Tf 8.228 0 Td [(must)-313(be)]TJ -283.126 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.116 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ + [(.)-400(The)-280(rank)-280(of)]TJ/F60 9.9626 Tf 275.562 0 Td [(y)]TJ/F62 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F60 9.9626 Tf 53.115 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -83.227 -19.22 Td [(desc)]TJ +/F59 9.9626 Tf -83.228 -20.65 Td [(desc)]TJ ET q -1 0 0 1 171.218 254.607 cm +1 0 0 1 120.408 384.625 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 254.408 Td [(a)]TJ +/F59 9.9626 Tf 123.397 384.426 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 206.786 cm +1 0 0 1 273.363 336.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 206.587 Td [(desc)]TJ +/F67 9.9626 Tf 276.501 336.605 Td [(desc)]TJ ET q -1 0 0 1 348.86 206.786 cm +1 0 0 1 298.05 336.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 206.587 Td [(type)]TJ +/F67 9.9626 Tf 301.189 336.605 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.22 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -222.215 -20.649 Td [(trans)]TJ 0 g 0 G +/F62 9.9626 Tf 27.666 0 Td [(specify)-250(with)]TJ/F60 9.9626 Tf 56.398 0 Td [(unitd)]TJ/F62 9.9626 Tf 24.637 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ 0 g 0 G - 0 -19.221 Td [(res)]TJ +/F59 9.9626 Tf -83.794 -20.65 Td [(trans)-250(=)-250('N')]TJ 0 g 0 G -/F54 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(dot)-250(pr)18(oduct)-250(of)-250(vectors)]TJ/F52 9.9626 Tf 126.33 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -151.968 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-289(as:)-389(a)-290(number)-290(or)-289(a)-290(rank-one)-289(array)-290(of)-289(the)-290(data)-289(type)-290(indicated)-289(in)]TJ 0 -11.955 Td [(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F62 9.9626 Tf 52.522 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(transposed)-250(matrix)]TJ 0 g 0 G - [(.)]TJ +/F59 9.9626 Tf -52.522 -16.303 Td [(trans)-250(=)-250('T')]TJ 0 g 0 G - 141.967 -29.888 Td [(34)]TJ +/F62 9.9626 Tf 50.869 0 Td [(the)-250(operation)-250(is)-250(with)-250(transposed)-250(matrix.)]TJ +0 g 0 G +/F59 9.9626 Tf -50.869 -16.302 Td [(trans)-250(=)-250('C')]TJ +0 g 0 G +/F62 9.9626 Tf 51.417 0 Td [(the)-250(operation)-250(is)-250(with)-250(conjugate)-250(transposed)-250(matrix.)]TJ -51.417 -20.65 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F93 10.3811 Tf 25.193 0 Td [(=)]TJ/F60 9.9626 Tf 11.434 0 Td [(N)]TJ/F62 9.9626 Tf -75.267 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.65 Td [(unitd)]TJ +0 g 0 G +/F62 9.9626 Tf 29.878 0 Td [(specify)-250(with)]TJ/F60 9.9626 Tf 56.398 0 Td [(trans)]TJ/F62 9.9626 Tf 23.521 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ +0 g 0 G +/F59 9.9626 Tf -84.89 -20.649 Td [(unitd)-250(=)-250('U')]TJ +0 g 0 G +/F62 9.9626 Tf 54.186 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(scaling)]TJ +0 g 0 G +/F59 9.9626 Tf -54.186 -16.303 Td [(unitd)-250(=)-250('L)74(')]TJ +0 g 0 G +/F62 9.9626 Tf 51.785 0 Td [(the)-250(operation)-250(is)-250(with)-250(left)-250(scaling)]TJ +0 g 0 G +/F59 9.9626 Tf -51.785 -16.302 Td [(unitd)-250(=)-250('R')]TJ +0 g 0 G +/F62 9.9626 Tf 53.628 0 Td [(the)-250(operation)-250(is)-250(with)-250(right)-250(scaling.)]TJ +0 g 0 G + 88.34 -29.888 Td [(51)]TJ 0 g 0 G ET endstream endobj -1064 0 obj +1303 0 obj << -/Length 582 +/Length 4541 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +/F62 9.9626 Tf 175.611 706.129 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(u)-25(n)-18(i)-32(t)-25(d)]TJ/F93 10.3811 Tf 26.159 0 Td [(=)]TJ/F60 9.9626 Tf 10.927 0 Td [(U)]TJ/F62 9.9626 Tf -75.726 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F59 9.9626 Tf -24.906 -19.925 Td [(choice)]TJ 0 g 0 G - 141.968 -567.87 Td [(35)]TJ +/F62 9.9626 Tf 33.753 0 Td [(speci\002es)-250(the)-250(update)-250(of)-250(overlap)-250(elements)-250(to)-250(be)-250(performed)-250(on)-250(exit:)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -3.865 -19.925 Td [(psb_none_)]TJ +0 g 0 G +0 g 0 G + 0 -15.941 Td [(psb_sum_)]TJ +0 g 0 G +0 g 0 G + 0 -15.94 Td [(psb_avg_)]TJ +0 g 0 G +0 g 0 G + 0 -15.94 Td [(psb_square_root_)]TJ/F62 9.9626 Tf -4.982 -19.925 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F67 9.9626 Tf 38.515 0 Td [(psb_avg_)]TJ/F62 9.9626 Tf -38.515 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(diag)]TJ +0 g 0 G +/F62 9.9626 Tf 24.906 0 Td [(the)-250(diagonal)-250(scaling)-250(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(d)-18(i)-47(a)-47(g)]TJ/F93 10.3811 Tf 18.52 0 Td [(\050)]TJ/F62 9.9626 Tf 4.15 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)-289(=)]TJ/F62 9.9626 Tf 18.002 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(n)-25(o)-35(s)-25(c)-40(a)-25(l)-48(i)-32(n)-47(g)]TJ/F93 10.3811 Tf 41.384 0 Td [(\051)]TJ/F62 9.9626 Tf -135.182 -11.955 Td [(Speci\002ed)-293(as:)-395(a)-293(rank)-293(one)-293(array)-292(containing)-293(numbers)-293(of)-293(the)-292(type)-293(indicated)]TJ 0 -11.955 Td [(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.926 Td [(work)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(a)-250(work)-250(array)111(.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-344(as:)-498(a)-344(rank)-343(one)-344(array)-344(of)-344(the)-344(same)-344(type)-344(of)]TJ/F60 9.9626 Tf 229.679 0 Td [(x)]TJ/F62 9.9626 Tf 8.631 0 Td [(with)-344(the)-344(T)74(ARGET)]TJ -238.31 -11.955 Td [(attribute.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.926 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.445 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(13)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -73.723 Td [(52)]TJ 0 g 0 G ET endstream endobj -1071 0 obj +1314 0 obj << -/Length 7477 +/Length 7550 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.4)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.14)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(normi)-250(\227)-250(In\002nity-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 233.576 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -238.781 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.459 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(in\002nity)-250(norm)-250(as:)]TJ/F52 9.9626 Tf 117.807 -18.736 Td [(a)-25(m)-40(a)-42(x)]TJ/F83 10.3811 Tf 25.761 0 Td [(\040)]TJ/F54 9.9626 Tf 13.272 0 Td [(max)]TJ/F52 7.5716 Tf 8.355 -7.21 Td [(i)]TJ/F83 10.3811 Tf 12.349 7.21 Td [(j)]TJ/F52 9.9626 Tf 3.298 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(j)]TJ/F54 9.9626 Tf -206.019 -23.313 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(the)-250(in\002nity-norm)-250(as:)]TJ/F52 9.9626 Tf 63.42 -18.737 Td [(a)-25(m)-40(a)-42(x)]TJ/F83 10.3811 Tf 25.761 0 Td [(\040)]TJ/F54 9.9626 Tf 13.273 0 Td [(max)]TJ/F52 7.5716 Tf 8.354 -7.21 Td [(i)]TJ/F85 10.3811 Tf 12.35 7.21 Td [(\050)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F52 9.9626 Tf 3.028 0 Td [(r)-17(e)]TJ/F85 10.3811 Tf 8.17 0 Td [(\050)]TJ/F52 9.9626 Tf 4.443 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F85 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F85 10.3811 Tf 5.066 0 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(j)]TJ/F52 9.9626 Tf 3.058 0 Td [(i)-32(m)]TJ/F85 10.3811 Tf 11.088 0 Td [(\050)]TJ/F52 9.9626 Tf 4.444 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F85 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F85 10.3811 Tf 3.128 0 Td [(\051)]TJ/F59 9.9626 Tf -225.616 -22.974 Td [(psb_geamax\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_normi\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(gemlt)-250(\227)-250(Entrywise)-250(Product)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F60 9.9626 Tf 299.677 0 Td [(x)]TJ/F62 9.9626 Tf 7.697 0 Td [(and)]TJ/F60 9.9626 Tf 19.481 0 Td [(y)]TJ -187.918 -21.112 Td [(d)-25(o)-35(t)]TJ/F91 10.3811 Tf 16.336 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F60 9.9626 Tf 4.274 0 Td [(y)]TJ/F93 10.3811 Tf 5.231 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ/F67 9.9626 Tf -187.465 -21.111 Td [(psb_gemlt\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 179.304 566.399 cm -[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S +1 0 0 1 183.035 630.896 cm +[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S Q BT -/F52 9.9626 Tf 185.556 557.832 Td [(a)-25(m)-40(a)-42(x)-7779(x)]TJ/F51 9.9626 Tf 220.764 0 Td [(Function)]TJ +/F60 9.9626 Tf 189.137 622.328 Td [(d)-25(o)-35(t)]TJ/F62 9.9626 Tf 13.444 0 Td [(,)]TJ/F60 9.9626 Tf 5.275 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F59 9.9626 Tf 91.759 0 Td [(Function)]TJ ET q -1 0 0 1 179.304 554.046 cm -[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S +1 0 0 1 183.035 618.542 cm +[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S Q BT -/F54 9.9626 Tf 185.282 545.478 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 189.012 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 422.639 545.677 cm +1 0 0 1 326.246 610.173 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 425.628 545.478 Td [(geamax)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 329.235 609.974 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 422.639 533.722 cm +1 0 0 1 326.246 598.218 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 425.628 533.523 Td [(geamax)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 329.235 598.019 Td [(gemlt)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 422.639 521.767 cm +1 0 0 1 326.246 586.263 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 425.628 521.568 Td [(geamax)]TJ -240.346 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 329.235 586.064 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 422.639 509.812 cm +1 0 0 1 326.246 574.308 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 425.628 509.612 Td [(geamax)]TJ +/F62 9.9626 Tf 329.235 574.109 Td [(gemlt)]TJ ET q -1 0 0 1 179.304 505.827 cm -[]0 d 0 J 0.398 w 0 0 m 286.513 0 l S +1 0 0 1 183.035 570.323 cm +[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 280.768 477.448 Td [(T)92(able)-250(4:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(14:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -30.014 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -18.652 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -18.653 Td [(x)]TJ + 0 -19.603 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 174.06 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 362.508 cm +1 0 0 1 280.646 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 362.308 Td [(T)]TJ +/F67 9.9626 Tf 283.785 421.578 Td [(T)]TJ ET q -1 0 0 1 445.669 362.508 cm +1 0 0 1 289.642 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 362.308 Td [(vect)]TJ +/F67 9.9626 Tf 292.781 421.578 Td [(vect)]TJ ET q -1 0 0 1 470.356 362.508 cm +1 0 0 1 314.33 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 362.308 Td [(type)]TJ +/F67 9.9626 Tf 317.468 421.578 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(4)]TJ + [-250(2)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -18.652 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -19.603 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 173.89 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 171.218 331.9 cm +1 0 0 1 280.646 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 283.785 342.199 Td [(T)]TJ +ET +q +1 0 0 1 289.642 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 292.781 342.199 Td [(vect)]TJ +ET +q +1 0 0 1 314.33 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 317.468 342.199 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 310.84 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 331.701 Td [(a)]TJ +/F59 9.9626 Tf 123.397 310.641 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 284.079 cm +1 0 0 1 273.363 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 283.88 Td [(desc)]TJ +/F67 9.9626 Tf 276.501 262.82 Td [(desc)]TJ ET q -1 0 0 1 348.86 284.079 cm +1 0 0 1 298.05 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 283.88 Td [(type)]TJ +/F67 9.9626 Tf 301.189 262.82 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -18.653 Td [(global)]TJ +/F59 9.9626 Tf -222.215 -19.602 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(Speci\002es)-226(whether)-227(the)-226(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.319 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(.true.)]TJ + 0 -19.603 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -30.607 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.68 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ +ET +q +1 0 0 1 280.646 175.993 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 283.785 175.794 Td [(T)]TJ +ET +q +1 0 0 1 289.642 175.993 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 292.781 175.794 Td [(vect)]TJ +ET +q +1 0 0 1 314.33 175.993 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 317.468 175.794 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(14)]TJ 0 g 0 G - 0 -18.653 Td [(Function)-250(value)]TJ + [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(vector)]TJ/F52 9.9626 Tf 128.562 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -181.637 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -24.907 -19.603 Td [(info)]TJ 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ 0 g 0 G - 141.968 -29.888 Td [(36)]TJ +/F62 9.9626 Tf 114.88 -29.888 Td [(53)]TJ 0 g 0 G ET endstream endobj -1076 0 obj -<< -/Length 2600 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(ove)1(r)18(head.)-514(It)-318(may)-318(be)-318(necessary)-317(and/or)-318(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-332(at)-333(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ/F59 9.9626 Tf 52.303 -19.926 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geamax\050x1,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(2)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geamax\050x2,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geamax\050x3,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_amx\050ctxt,vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ +1319 0 obj +<< +/Length 314 +>> +stream 0 g 0 G - [(:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ 0 g 0 G - [(\051\051)]TJ/F54 9.9626 Tf -52.303 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communicati)1(on,)-254(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ +BT +/F62 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -402.49 Td [(37)]TJ + 141.968 -603.736 Td [(54)]TJ 0 g 0 G ET endstream endobj -1085 0 obj +1330 0 obj << -/Length 6238 +/Length 7518 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.5)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(4.15)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(geamaxs)-250(\227)-250(Generalized)-250(In\002nity)-250(Norm)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(This)-256(subr)18(outine)-255(computes)-256(a)-256(series)-255(of)-256(in\002nity)-256(norms)-256(on)-255(the)-256(columns)-256(of)-255(a)-256(dense)]TJ 0 -11.955 Td [(matrix)]TJ/F52 9.9626 Tf 31.785 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(:)]TJ/F52 9.9626 Tf 88.539 -11.955 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.294 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.042 0 Td [(\040)]TJ/F54 9.9626 Tf 13.273 0 Td [(max)]TJ/F52 7.5716 Tf 7.759 -7.336 Td [(k)]TJ/F83 10.3811 Tf 12.944 7.336 Td [(j)]TJ/F52 9.9626 Tf 3.298 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(k)]TJ/F54 9.9626 Tf 4.598 0 Td [(,)]TJ/F52 9.9626 Tf 4.206 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -195.028 -22.296 Td [(call)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(division)-250(between)-250(two)-250(vectors)]TJ/F60 9.9626 Tf 300.604 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)]TJ/F60 9.9626 Tf 19.482 0 Td [(y)]TJ/F62 9.9626 Tf -188.347 -21.112 Td [(/)]TJ/F91 10.3811 Tf 9.054 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F93 10.3811 Tf 5.329 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.274 0 Td [(/)]TJ/F60 9.9626 Tf 6.286 0 Td [(y)]TJ/F93 10.3811 Tf 5.231 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ/F67 9.9626 Tf -186.967 -21.111 Td [(psb_gediv\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_geamaxs\050res,)-525(x,)-525(desc_a,)-525(info\051)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([flag\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 177.192 626.591 cm -[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +1 0 0 1 183.199 630.896 cm +[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S Q BT -/F52 9.9626 Tf 183.195 618.023 Td [(r)-17(e)-25(s)-8868(x)]TJ/F51 9.9626 Tf 221.013 0 Td [(Subroutine)]TJ +/F62 9.9626 Tf 189.301 622.328 Td [(/)-13(,)]TJ/F60 9.9626 Tf 11.437 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(,)]TJ/F60 9.9626 Tf 5.105 0 Td [(y)]TJ/F59 9.9626 Tf 99.043 0 Td [(Function)]TJ ET q -1 0 0 1 177.192 614.237 cm -[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +1 0 0 1 183.199 618.542 cm +[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S Q BT -/F54 9.9626 Tf 183.17 605.669 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 189.177 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 420.527 605.868 cm +1 0 0 1 326.41 610.173 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 605.669 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 329.399 609.974 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 420.527 593.913 cm +1 0 0 1 326.41 598.218 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 593.714 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 329.399 598.019 Td [(gediv)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 420.527 581.958 cm +1 0 0 1 326.41 586.263 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 581.759 Td [(geamaxs)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 329.399 586.064 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 420.527 570.003 cm +1 0 0 1 326.41 574.308 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 423.516 569.804 Td [(geamaxs)]TJ +/F62 9.9626 Tf 329.399 574.109 Td [(gediv)]TJ ET q -1 0 0 1 177.192 566.018 cm -[]0 d 0 J 0.398 w 0 0 m 290.737 0 l S +1 0 0 1 183.199 570.323 cm +[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 280.768 537.639 Td [(T)92(able)-250(5:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(15:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(x)]TJ + 0 -19.603 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 174.06 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 436.673 415.699 cm +1 0 0 1 280.646 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 415.5 Td [(T)]TJ +/F67 9.9626 Tf 283.785 421.578 Td [(T)]TJ ET q -1 0 0 1 445.669 415.699 cm +1 0 0 1 289.642 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 415.5 Td [(vect)]TJ +/F67 9.9626 Tf 292.781 421.578 Td [(vect)]TJ ET q -1 0 0 1 470.356 415.699 cm +1 0 0 1 314.33 421.777 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 415.5 Td [(type)]TJ +/F67 9.9626 Tf 317.468 421.578 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(5)]TJ + [-250(2)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -19.603 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 173.89 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 171.218 383.818 cm +1 0 0 1 280.646 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 283.785 342.199 Td [(T)]TJ +ET +q +1 0 0 1 289.642 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 292.781 342.199 Td [(vect)]TJ +ET +q +1 0 0 1 314.33 342.398 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 317.468 342.199 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(2)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 310.84 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 383.619 Td [(a)]TJ +/F59 9.9626 Tf 123.397 310.641 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 335.998 cm +1 0 0 1 273.363 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 335.798 Td [(desc)]TJ +/F67 9.9626 Tf 276.501 262.82 Td [(desc)]TJ ET q -1 0 0 1 348.86 335.998 cm +1 0 0 1 298.05 263.02 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 335.798 Td [(type)]TJ +/F67 9.9626 Tf 301.189 262.82 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F59 9.9626 Tf -222.215 -19.602 Td [(\003ag)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.925 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 21.589 0 Td [(check)-280(if)-280(any)-280(of)-280(the)]TJ/F60 9.9626 Tf 84.137 0 Td [(y)]TJ/F93 10.3811 Tf 5.23 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)-343(=)]TJ/F62 9.9626 Tf 19.108 0 Td [(0,)-287(and)-280(in)-280(case)-280(r)18(eturns)-280(err)18(or)-280(halting)-280(the)-280(compu-)]TJ -112.449 -11.956 Td [(tation.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F67 9.9626 Tf 132.133 0 Td [(flag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(.true.)]TJ 0 g 0 G - 0 -19.925 Td [(res)]TJ +/F59 9.9626 Tf -157.04 -19.603 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 18.261 0 Td [(is)-250(the)-250(in\002nity)-250(norm)-250(of)-250(the)-250(columns)-250(of)]TJ/F52 9.9626 Tf 166.26 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.82 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(Speci\002ed)-330(as:)-470(a)-330(number)-330(or)-330(a)-330(rank-one)-330(array)-329(of)-330(long)-330(pr)18(ecision)-330(r)18(eal)-330(num-)]TJ 0 -11.955 Td [(bers.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(info)]TJ + 0 -19.603 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.849 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ -151.111 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ 0 g 0 G - 141.968 -89.943 Td [(38)]TJ + 85.819 -29.888 Td [(55)]TJ 0 g 0 G ET endstream endobj -1093 0 obj +1336 0 obj << -/Length 7104 +/Length 1288 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.6)-1000(psb)]TJ +/F62 9.9626 Tf 175.611 706.129 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 331.456 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 334.594 706.129 Td [(T)]TJ +ET +q +1 0 0 1 340.452 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 343.59 706.129 Td [(vect)]TJ +ET +q +1 0 0 1 365.139 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 368.277 706.129 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 24.453 0 Td [(containing)-354(numbers)-355(of)]TJ -217.119 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(14)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -535.99 Td [(56)]TJ +0 g 0 G +ET + +endstream +endobj +1345 0 obj +<< +/Length 7434 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 99.895 706.129 Td [(4.16)-1000(psb)]TJ +ET +q +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 206.349 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.46 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 125.989 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F83 10.3811 Tf 25.352 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.007 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F54 9.9626 Tf -195.526 -21.269 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 71.974 -21.269 Td [(a)-25(s)-25(u)-25(m)]TJ/F83 10.3811 Tf 25.353 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F85 10.3811 Tf 8.17 0 Td [(\050)]TJ/F52 9.9626 Tf 4.443 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F85 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(k)]TJ/F52 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F85 10.3811 Tf 11.088 0 Td [(\050)]TJ/F52 9.9626 Tf 4.444 0 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F59 9.9626 Tf -216.928 -19.411 Td [(psb_geasum\050x,)-525(desc_a,)-525(info)-525([,global]\051)-190(psb_norm1\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(inverse)-250(of)-250(a)-250(vector)]TJ/F60 9.9626 Tf 252.097 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(and)-250(puts)-250(it)-250(into)]TJ/F60 9.9626 Tf 69.951 0 Td [(y)]TJ/F62 9.9626 Tf -184.401 -18.334 Td [(/)]TJ/F91 10.3811 Tf 9.054 0 Td [(\040)]TJ/F62 9.9626 Tf 13.272 0 Td [(1)-13(/)]TJ/F60 9.9626 Tf 11.562 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(.)]TJ/F67 9.9626 Tf -181.059 -18.334 Td [(psb_geinv\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(y,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525([flag\051)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 128.44 576.025 cm -[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +1 0 0 1 183.343 637.562 cm +[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S Q BT -/F52 9.9626 Tf 134.691 567.457 Td [(a)-25(s)-25(u)-25(m)-7810(x)]TJ/F51 9.9626 Tf 220.765 0 Td [(Function)]TJ +/F62 9.9626 Tf 189.446 628.995 Td [(/)-12(,)]TJ/F60 9.9626 Tf 11.437 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(y)]TJ/F59 9.9626 Tf 99.042 0 Td [(Function)]TJ ET q -1 0 0 1 128.44 563.671 cm -[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +1 0 0 1 183.343 625.209 cm +[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S Q BT -/F54 9.9626 Tf 134.417 555.103 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 189.321 616.641 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 371.775 555.303 cm +1 0 0 1 326.555 616.84 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 555.103 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 329.544 616.641 Td [(geinv)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 371.775 543.347 cm +1 0 0 1 326.555 604.885 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 543.148 Td [(geasum)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 329.544 604.686 Td [(geinv)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 371.775 531.392 cm +1 0 0 1 326.555 592.93 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 531.193 Td [(geasum)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 329.544 592.731 Td [(geinv)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 371.775 519.437 cm +1 0 0 1 326.555 580.975 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 374.763 519.238 Td [(geasum)]TJ +/F62 9.9626 Tf 329.544 580.775 Td [(geinv)]TJ ET q -1 0 0 1 128.44 515.452 cm -[]0 d 0 J 0.398 w 0 0 m 286.622 0 l S +1 0 0 1 183.343 576.99 cm +[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 229.958 487.074 Td [(T)92(able)-250(6:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 227.467 548.611 Td [(T)92(able)-250(16:)-310(Data)-250(types)]TJ 0 g 0 G 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -130.063 -33.561 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -127.572 -29.451 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.665 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -18.492 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.666 Td [(x)]TJ + 0 -18.491 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F60 9.9626 Tf 174.06 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ ET q -1 0 0 1 385.864 366.56 cm +1 0 0 1 280.646 434.555 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 366.361 Td [(T)]TJ +/F67 9.9626 Tf 283.785 434.356 Td [(T)]TJ ET q -1 0 0 1 394.86 366.56 cm +1 0 0 1 289.642 434.555 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 366.361 Td [(vect)]TJ +/F67 9.9626 Tf 292.781 434.356 Td [(vect)]TJ ET q -1 0 0 1 419.547 366.56 cm +1 0 0 1 314.33 434.555 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 366.361 Td [(type)]TJ +/F67 9.9626 Tf 317.468 434.356 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(6)]TJ + [-250(2)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.666 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -18.492 Td [(desc)]TJ ET q -1 0 0 1 120.408 334.939 cm +1 0 0 1 120.408 404.108 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 334.74 Td [(a)]TJ +/F59 9.9626 Tf 123.397 403.909 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 287.119 cm +1 0 0 1 273.363 356.288 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 286.919 Td [(desc)]TJ +/F67 9.9626 Tf 276.501 356.088 Td [(desc)]TJ ET q -1 0 0 1 298.05 287.119 cm +1 0 0 1 298.05 356.288 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 286.919 Td [(type)]TJ +/F67 9.9626 Tf 301.189 356.088 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -19.665 Td [(global)]TJ +/F59 9.9626 Tf -222.215 -18.491 Td [(\003ag)]TJ 0 g 0 G -/F54 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.955 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.318 0 Td [(global)]TJ +/F62 9.9626 Tf 21.589 0 Td [(check)-278(if)-279(any)-278(of)-278(the)]TJ/F60 9.9626 Tf 84.227 0 Td [(x)]TJ/F93 10.3811 Tf 5.329 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)-340(=)]TJ/F62 9.9626 Tf 19.049 0 Td [(0,)-285(and)-279(in)-278(case)-279(r)18(eturns)-278(err)18(or)-278(halting)-279(the)-278(compu-)]TJ -112.58 -11.955 Td [(tation.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F67 9.9626 Tf 132.133 0 Td [(flag)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG [(=)]TJ 0 g 0 G [(.true.)]TJ 0 g 0 G -/F51 9.9626 Tf -190.225 -31.621 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -157.04 -18.492 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.666 Td [(Function)-250(value)]TJ + 0 -18.491 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(vector)]TJ/F52 9.9626 Tf 102.161 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -155.236 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.849 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ -151.669 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 139.526 0 Td [(psb)]TJ +ET +q +1 0 0 1 280.646 205.171 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 283.785 204.972 Td [(T)]TJ +ET +q +1 0 0 1 289.642 205.171 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 292.781 204.972 Td [(vect)]TJ +ET +q +1 0 0 1 314.33 205.171 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 317.468 204.972 Td [(type)]TJ 0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +/F62 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(16)]TJ 0 g 0 G - 141.968 -29.888 Td [(39)]TJ + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -18.492 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -36.266 Td [(57)]TJ 0 g 0 G ET endstream endobj -1098 0 obj +1351 0 obj << -/Length 2603 +/Length 623 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-333(i)1(s)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ/F59 9.9626 Tf 20.922 -19.926 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geasum\050x1,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 31.382 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(2)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geasum\050x2,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ 0 -11.955 Td [(vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ -0 g 0 G - [(\051)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(=)]TJ -0 g 0 G - [-525(psb_geasum\050x3,desc_a,info,global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_sum\050ctxt,vres\050)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(1)]TJ -0 g 0 G - [(:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(3)]TJ +/F59 14.3462 Tf 150.705 706.042 Td [(5)-1000(Communication)-250(routines)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-303(r)18(outines)-302(in)-303(this)-303(chapter)-302(implement)-303(various)-303(global)-302(communication)-303(opera-)]TJ 0 -11.955 Td [(tors)-271(on)-271(vectors)-271(associated)-271(with)-271(a)-272(discr)18(etization)-271(mesh.)-373(For)-271(auxiliary)-271(communi-)]TJ 0 -11.955 Td [(cation)-250(r)18(outines)-250(not)-250(tied)-250(to)-250(a)-250(discr)18(etization)-250(space)-250(see)]TJ +0 0 1 rg 0 0 1 RG + [-250(6)]TJ 0 g 0 G - [(\051\051)]TJ/F54 9.9626 Tf -52.304 -19.926 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.955 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ + [(.)]TJ 0 g 0 G - 141.968 -402.49 Td [(40)]TJ + 166.874 -568.992 Td [(58)]TJ 0 g 0 G ET endstream endobj -1106 0 obj +1359 0 obj << -/Length 7308 +/Length 6634 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.7)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(5.1)-1000(psb)]TJ ET q 1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(geasums)-250(\227)-250(Generalized)-250(1-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -20.192 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(1-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F52 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(:)]TJ/F52 9.9626 Tf 120.031 -13.856 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.293 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)]TJ/F54 9.9626 Tf 13.273 0 Td [(max)]TJ/F52 7.5716 Tf 7.76 -7.335 Td [(k)]TJ/F83 10.3811 Tf 12.944 7.335 Td [(j)]TJ/F52 9.9626 Tf 3.298 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.273 0 Td [(k)]TJ/F54 9.9626 Tf 4.598 0 Td [(,)]TJ/F52 9.9626 Tf 4.206 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(j)]TJ/F54 9.9626 Tf -215.202 -24.535 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 206.349 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.46 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 125.227 -23.185 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.293 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.007 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ/F54 9.9626 Tf -196.039 -23.185 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(1-norm)-250(as:)]TJ/F52 9.9626 Tf 71.212 -23.185 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.294 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 18.737 0 Td [(r)-17(e)]TJ/F85 10.3811 Tf 8.169 0 Td [(\050)]TJ/F52 9.9626 Tf 4.444 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F85 10.3811 Tf 6.345 1.858 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(k)]TJ/F52 9.9626 Tf 5.37 0 Td [(i)-32(m)]TJ/F85 10.3811 Tf 11.088 0 Td [(\050)]TJ/F52 9.9626 Tf 4.443 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -212.21 -21.96 Td [(call)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ/F62 9.9626 Tf -51.121 -19.15 Td [(These)-250(subr)18(outines)-250(gathers)-250(the)-250(values)-250(of)-250(the)-250(halo)-250(elements:)]TJ/F60 9.9626 Tf 158.568 -25.014 Td [(x)]TJ/F91 10.3811 Tf 8.097 0 Td [(\040)]TJ/F60 9.9626 Tf 13.567 0 Td [(x)]TJ/F62 9.9626 Tf -180.232 -22.11 Td [(wher)18(e:)]TJ +0 g 0 G +/F60 9.9626 Tf 0.294 -20.212 Td [(x)]TJ 0 g 0 G - [-525(psb_geasums\050res,)-525(x,)-525(desc_a,)-525(info\051)]TJ +/F62 9.9626 Tf 10.187 0 Td [(is)-250(a)-250(global)-250(dense)-250(submatrix.)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 126.328 514.627 cm -[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +1 0 0 1 179.582 596.326 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 132.33 506.059 Td [(r)-17(e)-25(s)-8868(x)]TJ/F51 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +/F68 9.9626 Tf 185.685 587.758 Td [(a)]TJ/F62 9.9626 Tf 5.384 0 Td [(,)]TJ/F60 9.9626 Tf 5.276 0 Td [(x)]TJ/F59 9.9626 Tf 110.13 0 Td [(Subroutine)]TJ ET q -1 0 0 1 126.328 502.274 cm -[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +1 0 0 1 179.582 583.972 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 132.305 493.706 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 185.56 575.404 Td [(Integer)-8983(psb)]TJ ET q -1 0 0 1 369.663 493.905 cm +1 0 0 1 322.794 575.603 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 493.706 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 325.783 575.404 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 369.663 481.95 cm +1 0 0 1 322.794 563.648 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 481.751 Td [(geasums)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 325.783 563.449 Td [(halo)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 369.663 469.995 cm +1 0 0 1 322.794 551.693 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 469.795 Td [(geasums)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 325.783 551.494 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 369.663 458.04 cm +1 0 0 1 322.794 539.738 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.651 457.84 Td [(geasums)]TJ +/F62 9.9626 Tf 325.783 539.539 Td [(halo)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 126.328 454.054 cm -[]0 d 0 J 0.398 w 0 0 m 290.846 0 l S +1 0 0 1 322.794 527.783 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 325.783 527.583 Td [(halo)]TJ +ET +q +1 0 0 1 179.582 523.798 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 229.958 425.676 Td [(T)92(able)-250(7:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 227.467 495.419 Td [(T)92(able)-250(17:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -127.572 -24.102 Td [(call)]TJ 0 g 0 G + [-525(psb_halo\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -130.063 -37.636 Td [(T)90(ype:)]TJ + [-525(info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-525(psb_halo\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -22.46 Td [(On)-250(Entry)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -22.459 Td [(x)]TJ + [-525(work,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(data)]TJ +0 g 0 G + [(\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.301 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.308 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.309 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 89.688 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -79.949 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 385.864 295.5 cm +1 0 0 1 385.864 348.823 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 295.3 Td [(T)]TJ +/F67 9.9626 Tf 389.002 348.623 Td [(T)]TJ ET q -1 0 0 1 394.86 295.5 cm +1 0 0 1 394.86 348.823 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 295.3 Td [(vect)]TJ +/F67 9.9626 Tf 397.998 348.623 Td [(vect)]TJ ET q -1 0 0 1 419.547 295.5 cm +1 0 0 1 419.547 348.823 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 295.3 Td [(type)]TJ +/F67 9.9626 Tf 422.685 348.623 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(7)]TJ + [-250(17)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.459 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -20.308 Td [(desc)]TJ ET q -1 0 0 1 120.408 261.085 cm +1 0 0 1 120.408 316.559 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 260.886 Td [(a)]TJ +/F59 9.9626 Tf 123.397 316.36 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 213.264 cm +1 0 0 1 309.258 268.738 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 213.065 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 268.539 Td [(desc)]TJ ET q -1 0 0 1 298.05 213.264 cm +1 0 0 1 333.945 268.738 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 213.065 Td [(type)]TJ +/F67 9.9626 Tf 337.084 268.539 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -22.459 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -20.308 Td [(work)]TJ 0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F60 9.9626 Tf 218.454 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ 0 g 0 G - 0 -22.46 Td [(res)]TJ +/F59 9.9626 Tf -248.566 -20.309 Td [(data)]TJ 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F52 9.9626 Tf 176.182 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Short)-255(as:)-320(a)-255(long)-254(pr)18(ecision)-255(r)18(eal)-255(number)74(.)-325(Speci\002ed)-255(as:)-320(a)-254(long)-255(pr)18(ecision)-255(r)18(eal)]TJ 0 -11.955 Td [(number)74(.)]TJ +/F62 9.9626 Tf 24.349 0 Td [(index)-250(list)-250(selector)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)74(.)-290(V)92(alues:)]TJ/F67 9.9626 Tf 136.507 0 Td [(psb_comm_halo_)]TJ/F62 9.9626 Tf 73.224 0 Td [(,)]TJ/F67 9.9626 Tf 2.491 0 Td [(psb_comm_mov_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_comm_ext_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)]TJ -352.714 -11.955 Td [(default:)]TJ/F67 9.9626 Tf 39.042 0 Td [(psb_comm_halo_)]TJ/F62 9.9626 Tf 73.225 0 Td [(.)-634(Chooses)-358(the)-358(index)-358(list)-358(on)-357(which)-358(to)-358(base)-358(the)]TJ -112.267 -11.955 Td [(data)-250(exchange.)]TJ 0 g 0 G - 141.968 -29.888 Td [(41)]TJ + 141.968 -29.888 Td [(59)]TJ +0 g 0 G +ET + +endstream +endobj +1366 0 obj +<< +/Length 3039 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F60 9.9626 Tf 117.085 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Returned)-285(as:)-381(a)-285(rank)-285(one)-286(or)-285(two)-285(array)-285(containing)-285(numbers)-286(of)-285(type)-285(speci-)]TJ 0 -11.955 Td [(\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(17)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F60 9.9626 Tf 160.68 0 Td [(y)]TJ/F62 9.9626 Tf 5.106 0 Td [(.)]TJ -164.68 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value)-250(that)-250(contains)-250(an)-250(err)18(or)-250(code.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +1 0 0 1 210.511 336.406 cm +q +.45 0 0 .45 0 0 cm +q +1 0 0 1 0 0 cm +/Im4 Do +Q +Q +0 g 0 G +1 0 0 1 -210.511 -336.406 cm +BT +/F62 9.9626 Tf 240.086 304.526 Td [(Figur)18(e)-250(3:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ +0 g 0 G +0 g 0 G +/F59 11.9552 Tf -89.381 -23.91 Td [(Usage)-325(Example)]TJ/F62 9.9626 Tf 87.482 0 Td [(Consider)-325(the)-325(discr)18(etization)-324(mesh)-325(depicted)-325(in)-325(\002g.)]TJ +0 0 1 rg 0 0 1 RG + [-325(3)]TJ +0 g 0 G + [(,)-343(parti-)]TJ -87.482 -11.956 Td [(tioned)-219(among)-220(two)-219(pr)18(ocesses)-220(as)-219(shown)-220(b)1(y)-220(the)-219(dashed)-220(line;)-229(the)-220(data)-219(distribution)]TJ 0 -11.955 Td [(is)-343(such)-342(that)-343(each)-343(pr)18(ocess)-343(will)-342(own)-343(32)-343(entries)-343(in)-342(the)-343(index)-343(space,)-366(with)-342(a)-343(halo)]TJ 0 -11.955 Td [(made)-355(of)-355(8)-355(entries)-355(place)1(d)-355(at)-355(local)-355(indices)-355(33)-355(thr)18(ough)-355(40.)-624(If)-355(pr)18(ocess)-355(0)-355(assigns)]TJ 0 -11.955 Td [(an)-280(initial)-280(value)-280(of)-281(1)-280(to)-280(its)-280(entries)-280(in)-280(the)]TJ/F60 9.9626 Tf 173.857 0 Td [(x)]TJ/F62 9.9626 Tf 7.997 0 Td [(vector)74(,)-288(and)-280(pr)18(ocess)-280(1)-280(assigns)-280(a)-280(value)]TJ -181.854 -11.955 Td [(of)-314(2,)-329(then)-313(after)-314(a)-314(c)1(a)-1(l)1(l)-314(to)]TJ/F67 9.9626 Tf 106.994 0 Td [(psb_halo)]TJ/F62 9.9626 Tf 44.966 0 Td [(the)-314(conten)1(ts)-314(of)-314(the)-313(local)-314(vectors)-313(will)-314(be)-313(the)]TJ -151.96 -11.955 Td [(following:)]TJ +0 g 0 G + 166.874 -118.447 Td [(60)]TJ 0 g 0 G ET endstream endobj -1111 0 obj +1362 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/try8x8.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 1369 0 R +/BBox [0 0 498 439] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 1370 0 R +>>/Font << /R8 1371 0 R/R10 1372 0 R>> +>> +/Length 3349 +/Filter /FlateDecode +>> +stream +xœ]›½Ž$¹„ý~ŠötkìÿªXtÒj½ÃY}Ð-q’¡×WWfDFr±Æ$ɬªo›1d%ç÷gù¨ÏrÿÃÏ×o¿Ìç¯ÿ}Œãù¿‡=¿üù1Žùœs<³hÔñü÷ã×^Ù½ŽÔ}ì®c©¿õÅþVzê_=ú©þ>⩽¦ÇŽí'ž»!ïˆw È;\)ònÒBBZÃ!-$äݤ…5=ÖOÈztRZHLk€Óâ•/©ÇDµX=&¬·œÖcâZ ¼× b!r+“ÈÙ@¶xå €ì1‘­d‰ì-Gö˜ÈÖ²Ç5ÙGù¬|‡|7ˆ|Ç+_@d‹ùnÙâ@¶-ä»Ed‹k ²¹×И…D¶-^ù {LdkÙc"{Ë‘=&²µ€ìqÍ@ö‘@>Cqò)ÍY¼òD>³ì¬Eä3 Ï[@>³ô¬Eä3‹ÏiŠy´Ÿ…aeMò³xå èf-ËÏZô³–åç-8ZËò³=­eù9M€>åyÝœÅFê@l#mÄH?Ki³h¤œy¤ž1ÒŽ–FêÙb¤®+Ô‘ ¬ü|cîÇÌ3FŒ9FŒ™#7³Fê#Æ#ÆŒcÖH1ÇÈ!€•ŸïÌ¥‰ùŽÅì#ÎÌgƈ1ÇH=cÄ™9âÌ>âÌ1RDàÌ9°òó¹+˜-fŒsŒ3GnfÔ3FŒ9FŒ#Ƭ‘"cŽ‘C+?ߘëÁlq0cĘcĘ9r3k¤ž1bÌ1bÌ1fsŒXùùÎ<¤A‹Å<¤Áqæ! j¤ž1âÌ#k#Î<²922Á!€•ŸoÌ—$xe^à•õwI~WVß%ñ]Y{—¤weå]Þ•tw…ì®MuIt›æ’ä6Å%ÁmzKrÛԖĶi-I-+MBK½ð9 Ë1>úó(çÇ,Ým;V¥¯Ç(ó£ñ7P?#¿Óz_þq"ûýmó»ÜÁëw¿›`ùÁòz|ÛíÌÚÑìwM,¤…fÍ~F~§Ã:Z?kF³ßE±Öš5í©ž, Í]ËÌ`ÍŒf¿ cí/4kÞÈï4R •¹¡U|ö$´»é3Ðü.ßvsò°ohSaAB»›†æù~éhíèÍ~ëÇŽFhÖ´§z~° dáµ2š-Eb‡%4kÞÈï´EG«kmh…Ÿ}éÚÝt€E4ÜåÛn5öM¶|Š=aBëò;Ýhc“­¯bß(4k:@Èwù¶¹‹Eï/õ}3^<1sÁ„˜Ýatε©Û³>øµI`AÌ&GR€0wªÆIØ¿ÿüúOQu}ýçN5h=u÷ |ù§¨z|ù·õœÅîOïžó¶û××cdg¶ÌóôXñ½°ø³y‰ìÛ¶Ö +;ãÍï,_0 Ž 7k‚3ãŠM·à¼¿Dz,íÙ·qž<à¬åõ|Ád8¬Ðlåã…ëͺâM༿Dz¬áçpŸ?,pw SpeŸV.Å<î®cB`jgý%Òc±æp0©È>ðùÃÔxó»åõ|Á$8®¹,vŸÂõf_ñNFpÞ_"=Ve/,ÐÖWøù»­în`\òµmqeqÏ‚0‹÷H ®CHåàÆ&3°x×pÖ€‘œ-¯§,„[áú‰ù˜›&ÔÀd.¹œŒfÅì…e1,jÙ’µ¤07®Æ©Ø•@!Lqu aî\ƒFT7¡ fâJî–÷±ÃÝŽ+ÜM/»Þ·×üÀ"Îw·ÈZh!öùA¾™¿ÂóùÄý­ß˜?´Cœõá¾/¬ƒÏûAúÈ~—ö•ÒïüúÉi™Y“âˆì 1 ÃÄ<qVÖÆ¢4"›,YIsCk1#›0B3¡õ¤‹¹¡ð§ºÙ +U1š\o{¯Wµçlµ…ëé}ùëÑJ“ë­´\ZŽ!¶Ybþín¨جòþÖ_µ«Oâã;¼ª'®7wÓ›þàs×[i¸´"óØ]Œù·»©|Þ_µµOæÃûºªÝ'®7wS"øÜõVZ .-ÊÏÌW9p=òYÕþV<‰ï檶 ä뜸ùÌõVZ .­ËïŸÚÿŠ'óÁõ¦6¨¸Þ]/ÊœÁçý‡Ö†ªp.¾Êy ëÏú§¶ÀâI|t½©=*ù:ç…®>ë?´6T5|t1䜺îoýS»`ñ$>ºÞÔ6×»ëEi9ø¼ÿÐÚP•‹«+󕘸ø¬j#,žÌ7¤ºø:ç…®¾N}0ÿÐ qÖ‡»[”̓Ïû§öÂâI|—ä®ç×ONËÌê˜Gd±J³0|Ìó'aem,J#²gÙ\O…Ÿõ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®·Õ}›v¬½•p=Uí_^K¸žWƒ‘_´FCl³ÄüÛÝpÁf•÷·þ¦±x+¹M;V\﵇ª¢žçý%%ŠÖh»‹1ÿv7H>ïoÚ‹'ó¡jÛ´cÅõ^€¨ªLàyÞ_Re¢h†xf¾Êy€ë‘Ïú›vÄâI|¬Ð6íXÉ×9/p=òYI剢5šÇîbÌ?8p=Þßú›vÄâI|¬Æ6íXq½—"ªjxž÷—T£(Z£!^™¯Ä<¸ë‘Ïú›vÄâÉ|Cú`á|ó×#_§>˜_´FCœõa7ÁçýM;bñ$¾Kòˆúƒ_?9-3«cR‘]b•f!} ù‹“°²6¥Ùmd×Ëe×–ö¬ñÕ‹oÞöÅã÷n&´žt17´þT7[¡*fBK®—ÎÁx ›ªZèàÏ]-r½SkC”yñÔŒÏòÍÝN.xëŸÚ‹'ñÑõ¦v¬¸WU.ð<ôkm¨S1࣋!ßÜ-5Ÿ÷OíˆÅ“ùàzS;V\²«Šxúµ6Ô òÁÅÀW9t½©ò¦øÊ>¿t½©+ù:ç…®>ï×ÚP§]ÀGCþÁy ëMU0ü¹ÈŸ×îzéÜŠÇp±© +†¸úµ6ÔÉ–O¹¬ÌWbàzSE 0¬àË®—Ψxܳ>P…Uƒ|úˆò¬Öhˆ³>ÜÝâð^ðyÿÔŽX<‰ï’<ÂÇfÔ1â@aÐMŠC5Ú¢"mZ½!qVÖÆ¢4"{^›ëéŠÏúÜÐZÌÈ&ŒÐÅLh=ébnh#ü©n¶BUÌ„&×]Õ ÝÅFS5Cg_QUÍðӂȯZ£!ö²{U5'"ýÈCS5 žßÛæzÖæI¿®+®÷ó'MÕ <Ïû«Ö†:%>A©ªfè\dðy׎X<™§úºv¬¸Þ¡4U3ð<ï¯ZêD ùp¥ªš¡â³þ®±xOðuíXÉ×9/<ŠRUÍÀ‰¿|p+q¥ªš¡³¨Áçý];bñ$>žÖëÚ±âz?ŽÒTÍÀó¼¿jm¨“~/žú[™¯Ä<¸ë‘Ïú»vÄâÉ|Cú€‹‘¯s^x(¥ªšá]ú€[!ÎúðS)MÕ ÞPÌïms=k_’} ×ONËÌê˜Gd×X¥YgSjT3âÔr°-J#²{Ë®ç_ŤŒ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®÷Ó[ÎÏŸŸåùËãý¤†¿õ8šÇ÷‰<ÿÓ”ÿüúøýQ=Æ×oÏ?~}üøåzÖöüúχÿeK}ŽãýÜò±ž_{üôCýô¹Ï£üÐ>}¥”:~ü<ðóÄωŸ×§Ÿ¿þåíÝíýŸšÇóë/Îñé뿟ߟvñvÿ®Ý¾k×ïÚeoë»öõ]{Þí·¯??·~¢ûCÞŸ#²ÚaYïá÷…–aXŽí)Ãüã—÷'W¯û³êíãýŸ¼žkà®3Ô{οþõÝ*ŸþðøÓ×Çßßÿþ{Ü +endstream +endobj +1375 0 obj +<< +/Filter /FlateDecode +/Subtype /Type1C +/Length 13073 +>> +stream +xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs +ÏÇo”dz%!)&cÒ攤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ +¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ +ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– +Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ +Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 +«CÁ +»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w +v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7Š†%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ +ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ +èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ÎœG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøȪ‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) +sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/ÌåÆæªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²㶢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( +Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ +ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï +;žÓ¸‹FÄd4½…‚ïLûí“‹=wŽ°%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ +GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ×Ø.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïؤžAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ +‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ß•Õ¼Ò2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ +œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu +¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr +0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äó澞7á= +%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl +»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•Ñµa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçƱ€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 +ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ +ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( +½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η + Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi +õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø +¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ +Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*Ž†Ž7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@Ë©Ô·W‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥ÅgrŽ§t0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª +ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& +‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä +]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² + ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ +SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døѪ‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. +¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæ؇ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« +g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 +Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E +G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA +à= +®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ +è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞƇŒ¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ +endstream +endobj +1376 0 obj +<< +/Filter /FlateDecode +/Subtype /Type1C +/Length 11578 +>> +stream +xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» +bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o +‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU +‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûĪ¾XËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ +´Õ= +«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið +ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É +|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajMè‚šÉm¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøЉ•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ +SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi +‹» xŸÃk\_¥OÌ +O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôÑ°€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| +ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#Š°6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ +·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²G᧊RjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä +„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy +GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØΓë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO +è ~nè¸Ó\“¡ÍÑeåf +Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QŠ¦Žmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW +å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊá¡žž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ +Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe +€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'ඊ + iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ +Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ +(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígÏ°çÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ +œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ +Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} +lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒÑ‘Ð IòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L +³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië +H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ +Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú +?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” +å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ +Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, +¬Á®¶QU¯Ž§_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm +êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ +ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ +Ê Å±»ò + Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄEéƶîÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG +mÙ_¨&òK + ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë +ó‹‹Ê„²êlŽG™ƒýUª€ÝWŽ¶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ +åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ +Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ +ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ +ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ +endstream +endobj +1269 0 obj << -/Length 583 +/Type /ObjStm +/N 100 +/First 996 +/Length 13380 >> stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -567.87 Td [(42)]TJ -0 g 0 G -ET - -endstream -endobj -1118 0 obj +268 0 1265 58 1266 116 1267 175 1268 234 1261 293 1278 440 1260 642 1270 789 1271 933 +1272 1079 1273 1226 1274 1377 1275 1528 1276 1679 1280 1825 1277 1883 1285 2017 1282 2156 1287 2301 +272 2360 1288 2418 1284 2477 1296 2624 1283 2817 1289 2965 1290 3109 1291 3256 1292 3403 1293 3546 +1294 3693 1298 3838 1295 3896 1302 4030 1299 4178 1300 4325 1304 4472 1301 4531 1313 4651 1305 4844 +1306 4988 1307 5133 1308 5277 1309 5422 1310 5569 1311 5713 1315 5860 276 5918 1316 5975 1312 6033 +1318 6166 1320 6284 1317 6343 1329 6424 1321 6599 1322 6743 1323 6888 1324 7032 1325 7177 1331 7324 +280 7382 1332 7439 1328 7497 1335 7630 1326 7778 1327 7922 1337 8069 1334 8128 1344 8222 1338 8397 +1339 8539 1340 8684 1341 8831 1342 8975 1346 9122 284 9180 1347 9237 1343 9295 1350 9428 1348 9567 +1352 9715 288 9774 1349 9832 1358 9913 1353 10070 1354 10214 1355 10361 1360 10508 292 10566 1361 10623 +1357 10681 1365 10815 1369 10963 1370 11090 1371 11133 1372 11340 1373 11578 1374 11854 1356 12090 1363 12237 +% 268 0 obj << -/Length 6387 +/D [1262 0 R /XYZ 150.705 716.092 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.8)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(norm2)-250(\227)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -20.076 Td [(This)-250(function)-250(computes)-250(the)-250(2-norm)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 206.349 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -211.554 -11.955 Td [(If)]TJ/F52 9.9626 Tf 9.46 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(r)18(eal)-250(vector)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F52 9.9626 Tf 122.551 -25.46 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.788 0 Td [(2)]TJ/F83 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ -ET -q -1 0 0 1 287.432 658.569 cm -[]0 d 0 J 0.408 w 0 0 m 16.592 0 l S -Q -BT -/F52 9.9626 Tf 287.726 648.638 Td [(x)]TJ/F52 7.5716 Tf 5.399 2.88 Td [(T)]TJ/F52 9.9626 Tf 5.694 -2.88 Td [(x)]TJ/F54 9.9626 Tf -198.924 -23.065 Td [(else)-250(if)]TJ/F52 9.9626 Tf 28.159 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(a)-250(complex)-250(vector)-250(then)-250(it)-250(computes)-250(2-norm)-250(as:)]TJ/F52 9.9626 Tf 103.113 -25.46 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.789 0 Td [(2)]TJ/F83 10.3811 Tf 7.873 0 Td [(\040)]TJ 13.397 9.727 Td [(p)]TJ -ET -q -1 0 0 1 286.694 610.044 cm -[]0 d 0 J 0.408 w 0 0 m 18.069 0 l S -Q -BT -/F52 9.9626 Tf 286.988 600.113 Td [(x)]TJ/F52 7.5716 Tf 5.588 2.88 Td [(H)]TJ/F52 9.9626 Tf 6.982 -2.88 Td [(x)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 128.689 575.464 cm -[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S -Q -BT -/F52 9.9626 Tf 134.791 566.896 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.788 0 Td [(2)]TJ/F52 9.9626 Tf 82.505 0 Td [(x)]TJ/F51 9.9626 Tf 120.621 0 Td [(Function)]TJ -ET -q -1 0 0 1 128.689 563.111 cm -[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S -Q -BT -/F54 9.9626 Tf 134.667 554.543 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 372.024 554.742 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 375.012 554.543 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 372.024 542.787 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 375.012 542.588 Td [(genrm2)]TJ -240.346 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 372.024 530.832 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 375.012 530.632 Td [(genrm2)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 372.024 518.876 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 375.012 518.677 Td [(genrm2)]TJ -ET -q -1 0 0 1 128.689 514.891 cm -[]0 d 0 J 0.398 w 0 0 m 286.124 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 229.958 486.513 Td [(T)92(able)-250(8:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -115.119 -27.631 Td [(psb_genrm2\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -14.944 -11.955 Td [(psb_norm2\050x,)-525(desc_a,)-525(info)-525([,global]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -36.169 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.221 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.221 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.864 318.695 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 389.002 318.495 Td [(T)]TJ -ET -q -1 0 0 1 394.86 318.695 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 397.998 318.495 Td [(vect)]TJ -ET -q -1 0 0 1 419.547 318.695 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.685 318.495 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(8)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.221 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 284.518 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 284.319 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 236.698 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 236.499 Td [(desc)]TJ -ET -q -1 0 0 1 298.05 236.698 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 301.189 236.499 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -22.221 Td [(global)]TJ -0 g 0 G -/F54 9.9626 Tf 33.764 0 Td [(Speci\002es)-226(whether)-226(the)-227(computation)-226(should)-226(include)-227(the)-226(global)-226(r)18(eduction)]TJ -8.857 -11.956 Td [(acr)18(oss)-250(all)-250(pr)18(ocesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(scalar)74(.)-310(Default:)]TJ/F59 9.9626 Tf 165.318 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.true.)]TJ -0 g 0 G -/F51 9.9626 Tf -190.225 -34.176 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(43)]TJ -0 g 0 G -ET - -endstream -endobj -1011 0 obj +% 1265 0 obj << -/Type /ObjStm -/N 100 -/First 987 -/Length 12089 ->> -stream -216 0 1007 58 1013 165 1015 282 220 340 1012 397 1024 478 1016 661 1017 807 1018 951 -1019 1097 1020 1243 1021 1387 1026 1533 224 1592 998 1650 1023 1708 1031 1855 1022 2012 1028 2159 -1029 2303 1033 2449 1030 2507 1041 2614 1035 2789 1036 2930 1037 3076 1038 3220 1039 3365 1043 3512 -228 3571 1044 3629 1040 3688 1047 3808 1045 3947 1049 4093 1050 4151 1046 4209 1058 4316 1051 4500 -1052 4644 1053 4790 1054 4934 1055 5079 1056 5226 1060 5370 232 5429 1061 5487 1057 5545 1063 5678 -1065 5796 1062 5854 1070 5935 1066 6092 1067 6236 1068 6382 1072 6529 236 6588 1073 6646 1069 6705 -1075 6838 1077 6956 1078 7014 1074 7071 1084 7165 1080 7322 1081 7466 1082 7612 1086 7759 240 7818 -1087 7876 1083 7935 1092 8068 1088 8225 1089 8369 1090 8512 1094 8659 244 8717 1095 8774 1091 8831 -1097 8964 1099 9082 1100 9141 1096 9199 1105 9293 1101 9450 1102 9594 1103 9740 1107 9887 248 9945 -1108 10002 1104 10060 1110 10193 1112 10311 1109 10370 1117 10451 1113 10608 1114 10751 1115 10897 1119 11044 -% 216 0 obj +/D [1262 0 R /XYZ 290.728 674.17 null] +>> +% 1266 0 obj << -/D [1008 0 R /XYZ 150.705 613.372 null] +/D [1262 0 R /XYZ 287.931 654.041 null] >> -% 1007 0 obj +% 1267 0 obj +<< +/D [1262 0 R /XYZ 287.193 633.911 null] +>> +% 1268 0 obj +<< +/D [1262 0 R /XYZ 150.705 447.252 null] +>> +% 1261 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F68 1127 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1278 0 obj +<< +/Type /Page +/Contents 1279 0 R +/Resources 1277 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1281 0 R +/Annots [ 1260 0 R 1270 0 R 1271 0 R 1272 0 R 1273 0 R 1274 0 R 1275 0 R 1276 0 R ] +>> +% 1260 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [378.159 654.503 390.114 666.562] +/A << /S /GoTo /D (table.12) >> +>> +% 1270 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.549 588.085 444.603 600.145] +/A << /S /GoTo /D (vdata) >> +>> +% 1271 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [328.746 576.13 340.701 588.189] +/A << /S /GoTo /D (table.12) >> +>> +% 1272 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 497.757 323.106 509.817] +/A << /S /GoTo /D (descdata) >> +>> +% 1273 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [338.139 460.563 345.113 472.623] +/A << /S /GoTo /D (equation.4.1) >> +>> +% 1274 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [336.486 445.951 343.459 458.011] +/A << /S /GoTo /D (equation.4.2) >> +>> +% 1275 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [337.034 431.339 344.007 443.399] +/A << /S /GoTo /D (equation.4.3) >> +>> +% 1276 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [202.52 189.579 214.475 201.639] +/A << /S /GoTo /D (table.12) >> +>> +% 1280 0 obj +<< +/D [1278 0 R /XYZ 98.895 753.953 null] +>> +% 1277 0 obj << -/Font << /F94 915 0 R /F54 586 0 R /F51 584 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F68 1127 0 R /F60 666 0 R /F67 913 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1013 0 obj +% 1285 0 obj +<< +/Type /Page +/Contents 1286 0 R +/Resources 1284 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1281 0 R +/Annots [ 1282 0 R ] +>> +% 1282 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [428.968 116.52 440.924 128.58] +/A << /S /GoTo /D (table.13) >> +>> +% 1287 0 obj << -/Type /Page -/Contents 1014 0 R -/Resources 1012 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 991 0 R +/D [1285 0 R /XYZ 149.705 753.953 null] >> -% 1015 0 obj +% 272 0 obj << -/D [1013 0 R /XYZ 98.895 753.953 null] +/D [1285 0 R /XYZ 150.705 716.092 null] >> -% 220 0 obj +% 1288 0 obj << -/D [1013 0 R /XYZ 99.895 716.092 null] +/D [1285 0 R /XYZ 150.705 268.704 null] >> -% 1012 0 obj +% 1284 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F68 1127 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1024 0 obj +% 1296 0 obj << /Type /Page -/Contents 1025 0 R -/Resources 1023 0 R +/Contents 1297 0 R +/Resources 1295 0 R /MediaBox [0 0 595.276 841.89] -/Parent 991 0 R -/Annots [ 1016 0 R 1017 0 R 1018 0 R 1019 0 R 1020 0 R 1021 0 R ] +/Parent 1281 0 R +/Annots [ 1283 0 R 1289 0 R 1290 0 R 1291 0 R 1292 0 R 1293 0 R 1294 0 R ] >> -% 1016 0 obj +% 1283 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 413.274 435.942 425.334] -/A << /S /GoTo /D (table.1) >> +/Rect [305.144 654.503 312.117 666.562] +/A << /S /GoTo /D (section.3) >> >> -% 1017 0 obj +% 1289 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 345.063 495.412 357.123] +/Rect [368.549 586.032 444.603 598.092] /A << /S /GoTo /D (vdata) >> >> -% 1018 0 obj +% 1290 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.755 333.108 388.729 345.168] -/A << /S /GoTo /D (table.1) >> +/Rect [328.621 574.077 340.576 586.136] +/A << /S /GoTo /D (table.13) >> >> -% 1019 0 obj +% 1291 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 252.942 435.942 265.002] -/A << /S /GoTo /D (table.1) >> +/Rect [378.159 493.651 390.114 505.711] +/A << /S /GoTo /D (table.13) >> >> -% 1020 0 obj +% 1292 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 184.731 495.412 196.791] +/Rect [368.549 425.181 444.603 437.24] /A << /S /GoTo /D (vdata) >> >> -% 1021 0 obj +% 1293 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [397.868 172.776 404.842 184.835] -/A << /S /GoTo /D (table.1) >> ->> -% 1026 0 obj -<< -/D [1024 0 R /XYZ 149.705 753.953 null] +/Rect [328.746 413.225 340.701 425.285] +/A << /S /GoTo /D (table.13) >> >> -% 224 0 obj +% 1294 0 obj << -/D [1024 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [256.048 332.8 323.106 344.859] +/A << /S /GoTo /D (descdata) >> >> -% 998 0 obj +% 1298 0 obj << -/D [1024 0 R /XYZ 150.705 558.947 null] +/D [1296 0 R /XYZ 98.895 753.953 null] >> -% 1023 0 obj +% 1295 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R /F68 1127 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1031 0 obj +% 1302 0 obj << /Type /Page -/Contents 1032 0 R -/Resources 1030 0 R +/Contents 1303 0 R +/Resources 1301 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1022 0 R 1028 0 R 1029 0 R ] ->> -% 1022 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 690.368 323.106 702.428] -/A << /S /GoTo /D (descdata) >> +/Parent 1281 0 R +/Annots [ 1299 0 R 1300 0 R ] >> -% 1028 0 obj +% 1299 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 600.704 444.603 612.764] -/A << /S /GoTo /D (vdata) >> +/Rect [211.646 410.079 223.601 419.489] +/A << /S /GoTo /D (table.13) >> >> -% 1029 0 obj +% 1300 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [344.963 588.749 351.937 600.809] -/A << /S /GoTo /D (table.1) >> +/Rect [253.329 228.102 265.284 240.161] +/A << /S /GoTo /D (table.13) >> >> -% 1033 0 obj +% 1304 0 obj << -/D [1031 0 R /XYZ 98.895 753.953 null] +/D [1302 0 R /XYZ 149.705 753.953 null] >> -% 1030 0 obj +% 1301 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F60 666 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1041 0 obj +% 1313 0 obj << /Type /Page -/Contents 1042 0 R -/Resources 1040 0 R +/Contents 1314 0 R +/Resources 1312 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1035 0 R 1036 0 R 1037 0 R 1038 0 R 1039 0 R ] +/Parent 1281 0 R +/Annots [ 1305 0 R 1306 0 R 1307 0 R 1308 0 R 1309 0 R 1310 0 R 1311 0 R ] >> -% 1035 0 obj +% 1305 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 355.7 495.412 367.76] +/Rect [263.331 417.772 339.385 429.832] /A << /S /GoTo /D (vdata) >> >> -% 1036 0 obj +% 1306 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.755 343.745 388.729 355.804] +/Rect [224.557 405.817 231.53 417.877] /A << /S /GoTo /D (table.2) >> >> -% 1037 0 obj +% 1307 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 264.018 495.412 276.078] +/Rect [263.331 338.393 339.385 350.453] /A << /S /GoTo /D (vdata) >> >> -% 1038 0 obj +% 1308 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.88 252.063 388.854 264.123] +/Rect [224.557 326.438 231.53 338.498] /A << /S /GoTo /D (table.2) >> >> -% 1039 0 obj +% 1309 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 172.336 373.916 184.396] +/Rect [256.048 259.015 323.106 271.074] /A << /S /GoTo /D (descdata) >> >> -% 1043 0 obj +% 1310 0 obj << -/D [1041 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [263.331 171.988 339.385 184.048] +/A << /S /GoTo /D (vdata) >> >> -% 228 0 obj +% 1311 0 obj << -/D [1041 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [242.868 160.033 254.823 172.093] +/A << /S /GoTo /D (table.14) >> >> -% 1044 0 obj +% 1315 0 obj << -/D [1041 0 R /XYZ 150.705 499.951 null] +/D [1313 0 R /XYZ 98.895 753.953 null] >> -% 1040 0 obj +% 276 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [1313 0 R /XYZ 99.895 716.092 null] >> -% 1047 0 obj +% 1316 0 obj << -/Type /Page -/Contents 1048 0 R -/Resources 1046 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1045 0 R ] +/D [1313 0 R /XYZ 99.895 560.161 null] >> -% 1045 0 obj +% 1312 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.159 602.697 385.133 614.756] -/A << /S /GoTo /D (table.2) >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1049 0 obj +% 1318 0 obj << -/D [1047 0 R /XYZ 98.895 753.953 null] +/Type /Page +/Contents 1319 0 R +/Resources 1317 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1281 0 R >> -% 1050 0 obj +% 1320 0 obj << -/D [1047 0 R /XYZ 99.895 512.639 null] +/D [1318 0 R /XYZ 149.705 753.953 null] >> -% 1046 0 obj +% 1317 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F59 665 0 R >> /ProcSet [ /PDF /Text ] >> -% 1058 0 obj +% 1329 0 obj << /Type /Page -/Contents 1059 0 R -/Resources 1057 0 R +/Contents 1330 0 R +/Resources 1328 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1051 0 R 1052 0 R 1053 0 R 1054 0 R 1055 0 R 1056 0 R ] +/Parent 1333 0 R +/Annots [ 1321 0 R 1322 0 R 1323 0 R 1324 0 R 1325 0 R ] >> -% 1051 0 obj +% 1321 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 384.684 495.412 396.744] +/Rect [263.331 417.772 339.385 429.832] /A << /S /GoTo /D (vdata) >> >> -% 1052 0 obj +% 1322 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.755 372.729 388.729 384.789] -/A << /S /GoTo /D (table.3) >> +/Rect [224.557 405.817 231.53 417.877] +/A << /S /GoTo /D (table.2) >> >> -% 1053 0 obj +% 1323 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 293.733 495.412 305.793] +/Rect [263.331 338.393 339.385 350.453] /A << /S /GoTo /D (vdata) >> >> -% 1054 0 obj +% 1324 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [381.88 281.778 388.854 293.837] -/A << /S /GoTo /D (table.3) >> +/Rect [224.557 326.438 231.53 338.498] +/A << /S /GoTo /D (table.2) >> >> -% 1055 0 obj +% 1325 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 202.781 373.916 214.841] +/Rect [256.048 259.015 323.106 271.074] /A << /S /GoTo /D (descdata) >> >> -% 1056 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [200.458 119.17 207.432 128.58] -/A << /S /GoTo /D (table.2) >> ->> -% 1060 0 obj +% 1331 0 obj << -/D [1058 0 R /XYZ 149.705 753.953 null] +/D [1329 0 R /XYZ 98.895 753.953 null] >> -% 232 0 obj +% 280 0 obj << -/D [1058 0 R /XYZ 150.705 716.092 null] +/D [1329 0 R /XYZ 99.895 716.092 null] >> -% 1061 0 obj +% 1332 0 obj << -/D [1058 0 R /XYZ 150.705 524.97 null] +/D [1329 0 R /XYZ 99.895 560.161 null] >> -% 1057 0 obj +% 1328 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1063 0 obj +% 1335 0 obj << /Type /Page -/Contents 1064 0 R -/Resources 1062 0 R +/Contents 1336 0 R +/Resources 1334 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R +/Parent 1333 0 R +/Annots [ 1326 0 R 1327 0 R ] >> -% 1065 0 obj +% 1326 0 obj << -/D [1063 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [314.141 702.323 390.195 714.383] +/A << /S /GoTo /D (vdata) >> >> -% 1062 0 obj +% 1327 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [293.677 690.368 305.632 702.428] +/A << /S /GoTo /D (table.14) >> +>> +% 1337 0 obj +<< +/D [1335 0 R /XYZ 149.705 753.953 null] +>> +% 1334 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F62 667 0 R /F67 913 0 R /F59 665 0 R >> /ProcSet [ /PDF /Text ] >> -% 1070 0 obj +% 1344 0 obj << /Type /Page -/Contents 1071 0 R -/Resources 1069 0 R +/Contents 1345 0 R +/Resources 1343 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1034 0 R -/Annots [ 1066 0 R 1067 0 R 1068 0 R ] +/Parent 1333 0 R +/Annots [ 1338 0 R 1339 0 R 1340 0 R 1341 0 R 1342 0 R ] >> -% 1066 0 obj +% 1338 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 358.503 495.412 370.562] +/Rect [263.331 430.55 339.385 442.61] /A << /S /GoTo /D (vdata) >> >> -% 1067 0 obj +% 1339 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [377.462 346.547 384.436 358.607] -/A << /S /GoTo /D (table.4) >> +/Rect [224.557 418.595 231.53 430.655] +/A << /S /GoTo /D (table.2) >> >> -% 1068 0 obj +% 1340 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 280.074 373.916 292.134] +/Rect [256.048 352.283 323.106 364.342] /A << /S /GoTo /D (descdata) >> >> -% 1072 0 obj -<< -/D [1070 0 R /XYZ 149.705 753.953 null] ->> -% 236 0 obj -<< -/D [1070 0 R /XYZ 150.705 716.092 null] ->> -% 1073 0 obj +% 1341 0 obj << -/D [1070 0 R /XYZ 150.705 495.665 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [263.331 201.166 339.385 213.226] +/A << /S /GoTo /D (vdata) >> >> -% 1069 0 obj +% 1342 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [242.868 189.211 254.823 201.271] +/A << /S /GoTo /D (table.16) >> >> -% 1075 0 obj +% 1346 0 obj << -/Type /Page -/Contents 1076 0 R -/Resources 1074 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R +/D [1344 0 R /XYZ 98.895 753.953 null] >> -% 1077 0 obj +% 284 0 obj << -/D [1075 0 R /XYZ 98.895 753.953 null] +/D [1344 0 R /XYZ 99.895 716.092 null] >> -% 1078 0 obj +% 1347 0 obj << -/D [1075 0 R /XYZ 99.895 632.19 null] +/D [1344 0 R /XYZ 99.895 566.828 null] >> -% 1074 0 obj +% 1343 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1084 0 obj +% 1350 0 obj << /Type /Page -/Contents 1085 0 R -/Resources 1083 0 R +/Contents 1351 0 R +/Resources 1349 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R -/Annots [ 1080 0 R 1081 0 R 1082 0 R ] ->> -% 1080 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 411.694 495.412 423.754] -/A << /S /GoTo /D (vdata) >> ->> -% 1081 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [377.462 399.739 384.436 411.798] -/A << /S /GoTo /D (table.5) >> +/Parent 1333 0 R +/Annots [ 1348 0 R ] >> -% 1082 0 obj +% 1348 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 331.993 373.916 344.052] -/A << /S /GoTo /D (descdata) >> ->> -% 1086 0 obj -<< -/D [1084 0 R /XYZ 149.705 753.953 null] +/Rect [378.029 655.624 385.003 667.684] +/A << /S /GoTo /D (section.6) >> >> -% 240 0 obj +% 1352 0 obj << -/D [1084 0 R /XYZ 150.705 716.092 null] +/D [1350 0 R /XYZ 149.705 753.953 null] >> -% 1087 0 obj +% 288 0 obj << -/D [1084 0 R /XYZ 150.705 555.856 null] +/D [1350 0 R /XYZ 150.705 716.092 null] >> -% 1083 0 obj +% 1349 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1092 0 obj +% 1358 0 obj << /Type /Page -/Contents 1093 0 R -/Resources 1091 0 R +/Contents 1359 0 R +/Resources 1357 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R -/Annots [ 1088 0 R 1089 0 R 1090 0 R ] +/Parent 1333 0 R +/Annots [ 1353 0 R 1354 0 R 1355 0 R ] >> -% 1088 0 obj +% 1353 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 362.555 444.603 374.615] +/Rect [368.549 344.818 444.603 356.877] /A << /S /GoTo /D (vdata) >> >> -% 1089 0 obj +% 1354 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 350.6 333.626 362.66] -/A << /S /GoTo /D (table.6) >> +/Rect [326.652 332.863 338.608 344.922] +/A << /S /GoTo /D (table.17) >> >> -% 1090 0 obj +% 1355 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 283.114 323.106 295.173] +/Rect [291.943 264.733 359.001 276.793] /A << /S /GoTo /D (descdata) >> >> -% 1094 0 obj +% 1360 0 obj << -/D [1092 0 R /XYZ 98.895 753.953 null] +/D [1358 0 R /XYZ 98.895 753.953 null] >> -% 244 0 obj +% 292 0 obj << -/D [1092 0 R /XYZ 99.895 716.092 null] +/D [1358 0 R /XYZ 99.895 716.092 null] >> -% 1095 0 obj +% 1361 0 obj << -/D [1092 0 R /XYZ 99.895 505.29 null] +/D [1358 0 R /XYZ 99.895 513.636 null] >> -% 1091 0 obj +% 1357 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F68 1127 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1097 0 obj +% 1365 0 obj << /Type /Page -/Contents 1098 0 R -/Resources 1096 0 R +/Contents 1366 0 R +/Resources 1364 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R +/Parent 1333 0 R +/Annots [ 1356 0 R 1363 0 R ] >> -% 1099 0 obj +% 1369 0 obj << -/D [1097 0 R /XYZ 149.705 753.953 null] +/Producer (GPL Ghostscript 9.22) +/CreationDate (D:20180323100645Z00'00') +/ModDate (D:20180323100645Z00'00') >> -% 1100 0 obj +% 1370 0 obj << -/D [1097 0 R /XYZ 150.705 632.19 null] +/Type /ExtGState +/OPM 1 >> -% 1096 0 obj +% 1371 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/BaseFont /XYUGDR+Times-Roman +/FontDescriptor 1373 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 500 500 500 500 500] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 1105 0 obj +% 1372 0 obj << -/Type /Page -/Contents 1106 0 R -/Resources 1104 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R -/Annots [ 1101 0 R 1102 0 R 1103 0 R ] +/BaseFont /XISTAL+Times-Bold +/FontDescriptor 1374 0 R +/Type /Font +/FirstChar 48 +/LastChar 80 +/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 1101 0 obj +% 1373 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 291.495 444.603 303.554] -/A << /S /GoTo /D (vdata) >> +/Type /FontDescriptor +/FontName /XYUGDR+Times-Roman +/FontBBox [ 0 -14 476 688] +/Flags 65568 +/Ascent 688 +/CapHeight 688 +/Descent -14 +/ItalicAngle 0 +/StemV 71 +/MissingWidth 250 +/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) +/FontFile3 1375 0 R >> -% 1102 0 obj +% 1374 0 obj +<< +/Type /FontDescriptor +/FontName /XISTAL+Times-Bold +/FontBBox [ 0 -13 600 688] +/Flags 65568 +/Ascent 688 +/CapHeight 676 +/Descent -13 +/ItalicAngle 0 +/StemV 90 +/MissingWidth 250 +/CharSet (/P/one/zero) +/FontFile3 1376 0 R +>> +% 1356 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 279.539 333.626 291.599] -/A << /S /GoTo /D (table.7) >> +/Rect [231.023 625.272 242.978 634.682] +/A << /S /GoTo /D (table.17) >> >> -% 1103 0 obj +% 1363 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 209.259 323.106 221.319] -/A << /S /GoTo /D (descdata) >> ->> -% 1107 0 obj -<< -/D [1105 0 R /XYZ 98.895 753.953 null] +/Rect [458.157 276.439 465.131 290.202] +/A << /S /GoTo /D (figure.3) >> >> -% 248 0 obj + +endstream +endobj +1380 0 obj << -/D [1105 0 R /XYZ 99.895 716.092 null] +/Length 3048 >> -% 1108 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 8.9664 Tf 209.77 645.656 Td [(Pr)18(ocess)-250(0)-7729(Pr)18(ocess)-250(1)]TJ -31.696 -10.959 Td [(I)-1333(GLOB\050I\051)-1334(X\050I\051)-4663(I)-1333(GLOB\050I\051)-1333(X\050I\051)]TJ -1.462 -10.959 Td [(1)-4607(1)-1754(1.0)-4500(1)-4107(33)-1753(2.0)]TJ 0 -10.959 Td [(2)-4607(2)-1754(1.0)-4500(2)-4107(34)-1753(2.0)]TJ 0 -10.959 Td [(3)-4607(3)-1754(1.0)-4500(3)-4107(35)-1753(2.0)]TJ 0 -10.959 Td [(4)-4607(4)-1754(1.0)-4500(4)-4107(36)-1753(2.0)]TJ 0 -10.959 Td [(5)-4607(5)-1754(1.0)-4500(5)-4107(37)-1753(2.0)]TJ 0 -10.959 Td [(6)-4607(6)-1754(1.0)-4500(6)-4107(38)-1753(2.0)]TJ 0 -10.959 Td [(7)-4607(7)-1754(1.0)-4500(7)-4107(39)-1753(2.0)]TJ 0 -10.958 Td [(8)-4607(8)-1754(1.0)-4500(8)-4107(40)-1753(2.0)]TJ 0 -10.959 Td [(9)-4607(9)-1754(1.0)-4500(9)-4107(41)-1753(2.0)]TJ -4.483 -10.959 Td [(10)-4107(10)-1754(1.0)-4000(10)-4107(42)-1753(2.0)]TJ 0 -10.959 Td [(11)-4107(11)-1754(1.0)-4000(11)-4107(43)-1753(2.0)]TJ 0 -10.959 Td [(12)-4107(12)-1754(1.0)-4000(12)-4107(44)-1753(2.0)]TJ 0 -10.959 Td [(13)-4107(13)-1754(1.0)-4000(13)-4107(45)-1753(2.0)]TJ 0 -10.959 Td [(14)-4107(14)-1754(1.0)-4000(14)-4107(46)-1753(2.0)]TJ 0 -10.959 Td [(15)-4107(15)-1754(1.0)-4000(15)-4107(47)-1753(2.0)]TJ 0 -10.959 Td [(16)-4107(16)-1754(1.0)-4000(16)-4107(48)-1753(2.0)]TJ 0 -10.959 Td [(17)-4107(17)-1754(1.0)-4000(17)-4107(49)-1753(2.0)]TJ 0 -10.958 Td [(18)-4107(18)-1754(1.0)-4000(18)-4107(50)-1753(2.0)]TJ 0 -10.959 Td [(19)-4107(19)-1754(1.0)-4000(19)-4107(51)-1753(2.0)]TJ 0 -10.959 Td [(20)-4107(20)-1754(1.0)-4000(20)-4107(52)-1753(2.0)]TJ 0 -10.959 Td [(21)-4107(21)-1754(1.0)-4000(21)-4107(53)-1753(2.0)]TJ 0 -10.959 Td [(22)-4107(22)-1754(1.0)-4000(22)-4107(54)-1753(2.0)]TJ 0 -10.959 Td [(23)-4107(23)-1754(1.0)-4000(23)-4107(55)-1753(2.0)]TJ 0 -10.959 Td [(24)-4107(24)-1754(1.0)-4000(24)-4107(56)-1753(2.0)]TJ 0 -10.959 Td [(25)-4107(25)-1754(1.0)-4000(25)-4107(57)-1753(2.0)]TJ 0 -10.959 Td [(26)-4107(26)-1754(1.0)-4000(26)-4107(58)-1753(2.0)]TJ 0 -10.959 Td [(27)-4107(27)-1754(1.0)-4000(27)-4107(59)-1753(2.0)]TJ 0 -10.958 Td [(28)-4107(28)-1754(1.0)-4000(28)-4107(60)-1753(2.0)]TJ 0 -10.959 Td [(29)-4107(29)-1754(1.0)-4000(29)-4107(61)-1753(2.0)]TJ 0 -10.959 Td [(30)-4107(30)-1754(1.0)-4000(30)-4107(62)-1753(2.0)]TJ 0 -10.959 Td [(31)-4107(31)-1754(1.0)-4000(31)-4107(63)-1753(2.0)]TJ 0 -10.959 Td [(32)-4107(32)-1754(1.0)-4000(32)-4107(64)-1753(2.0)]TJ 0 -10.959 Td [(33)-4107(33)-1754(2.0)-4000(33)-4107(25)-1753(1.0)]TJ 0 -10.959 Td [(34)-4107(34)-1754(2.0)-4000(34)-4107(26)-1753(1.0)]TJ 0 -10.959 Td [(35)-4107(35)-1754(2.0)-4000(35)-4107(27)-1753(1.0)]TJ 0 -10.959 Td [(36)-4107(36)-1754(2.0)-4000(36)-4107(28)-1753(1.0)]TJ 0 -10.959 Td [(37)-4107(37)-1754(2.0)-4000(37)-4107(29)-1753(1.0)]TJ 0 -10.958 Td [(38)-4107(38)-1754(2.0)-4000(38)-4107(30)-1753(1.0)]TJ 0 -10.959 Td [(39)-4107(39)-1754(2.0)-4000(39)-4107(31)-1753(1.0)]TJ 0 -10.959 Td [(40)-4107(40)-1754(2.0)-4000(40)-4107(32)-1753(1.0)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 94.641 -105.903 Td [(61)]TJ +0 g 0 G +ET + +endstream +endobj +1388 0 obj << -/D [1105 0 R /XYZ 99.895 443.893 null] +/Length 7819 >> -% 1104 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(5.2)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ/F62 9.9626 Tf -51.12 -18.964 Td [(These)-250(subr)18(outines)-250(applies)-250(an)-250(overlap)-250(operator)-250(to)-250(the)-250(input)-250(vector:)]TJ/F60 9.9626 Tf 154.518 -23.824 Td [(x)]TJ/F91 10.3811 Tf 8.097 0 Td [(\040)]TJ/F60 9.9626 Tf 13.497 0 Td [(Q)-42(x)]TJ/F62 9.9626 Tf -176.112 -21.014 Td [(wher)18(e:)]TJ +0 g 0 G +/F60 9.9626 Tf 0.294 -19.203 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(submatrix)]TJ/F60 9.9626 Tf 131.351 0 Td [(x)]TJ +0 g 0 G + -141.607 -19.564 Td [(Q)]TJ +0 g 0 G +/F62 9.9626 Tf 12.857 0 Td [(is)-250(the)-250(overlap)-250(operator;)-250(it)-250(is)-250(the)-250(composition)-250(of)-250(two)-250(operators)]TJ/F60 9.9626 Tf 271.931 0 Td [(P)]TJ/F60 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F62 9.9626 Tf 6.445 1.494 Td [(and)]TJ/F60 9.9626 Tf 19.681 0 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F62 9.9626 Tf 5.401 -3.616 Td [(.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +ET +q +1 0 0 1 230.392 581.71 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +BT +/F60 9.9626 Tf 236.663 573.142 Td [(x)]TJ/F59 9.9626 Tf 120.622 0 Td [(Subroutine)]TJ +ET +q +1 0 0 1 230.392 569.356 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +BT +/F62 9.9626 Tf 236.369 560.788 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +ET +q +1 0 0 1 373.603 560.988 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 560.788 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +ET +q +1 0 0 1 373.603 549.032 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 548.833 Td [(ovrl)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +ET +q +1 0 0 1 373.603 537.077 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 536.878 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +ET +q +1 0 0 1 373.603 525.122 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 524.923 Td [(ovrl)]TJ +ET +q +1 0 0 1 230.392 521.137 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 278.277 492.758 Td [(T)92(able)-250(18:)-310(Data)-250(types)]TJ +0 g 0 G +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -127.572 -23.549 Td [(call)]TJ +0 g 0 G + [-525(psb_ovrl\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-525(psb_ovrl\050x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(update)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(update_type,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(work)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(work\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.014 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.564 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.564 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 89.687 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -79.948 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 244.742 0 Td [(psb)]TJ +ET +q +1 0 0 1 436.673 349.49 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.811 349.291 Td [(T)]TJ +ET +q +1 0 0 1 445.669 349.49 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 349.291 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 349.49 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 473.495 349.291 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(18)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.564 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 317.971 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 317.772 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 270.151 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 269.951 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 270.151 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 387.893 269.951 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -19.564 Td [(update)]TJ +0 g 0 G +/F62 9.9626 Tf 36.523 0 Td [(Update)-250(operator)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -11.617 -31.519 Td [(update)-250(=)-250(psb)]TJ +ET +q +1 0 0 1 235.367 219.067 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 238.356 218.868 Td [(none)]TJ +ET +q +1 0 0 1 261.648 219.067 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 269.619 218.868 Td [(Do)-250(nothing;)]TJ +0 g 0 G +/F59 9.9626 Tf -94.008 -15.579 Td [(update)-250(=)-250(psb)]TJ +ET +q +1 0 0 1 235.367 203.488 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 238.356 203.289 Td [(add)]TJ +ET +q +1 0 0 1 256.109 203.488 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 264.079 203.289 Td [(Sum)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F60 9.9626 Tf 137.239 0 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F62 9.9626 Tf 5.4 -3.617 Td [(;)]TJ +0 g 0 G +/F59 9.9626 Tf -237.512 -15.579 Td [(update)-250(=)-250(psb)]TJ +ET +q +1 0 0 1 235.367 187.91 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 238.356 187.71 Td [(avg)]TJ +ET +q +1 0 0 1 255.013 187.91 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +0 g 0 G +BT +/F62 9.9626 Tf 262.983 187.71 Td [(A)92(verage)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F60 9.9626 Tf 153.667 0 Td [(P)]TJ/F60 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F60 9.9626 Tf 4.278 1.494 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F62 9.9626 Tf 5.401 -3.617 Td [(;)]TJ -262.547 -19.564 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.64 0 Td [(u)-80(p)-25(d)-40(a)-25(t)-25(e)]TJ +ET +q +1 0 0 1 244.034 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F60 9.9626 Tf 247.147 144.236 Td [(t)-25(y)-80(p)-25(e)]TJ/F93 10.3811 Tf 21.467 0 Td [(=)]TJ/F60 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ +ET +q +1 0 0 1 294.938 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F60 9.9626 Tf 298.201 144.236 Td [(a)-25(v)-47(g)]TJ +ET +q +1 0 0 1 314.026 144.435 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 175.611 132.281 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(integer)-250(variable.)]TJ +0 g 0 G + 141.968 -29.888 Td [(62)]TJ +0 g 0 G +ET + +endstream +endobj +1397 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Length 5447 >> -% 1110 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(work)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(dimensional)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F60 9.9626 Tf 252.794 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -282.907 -19.925 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F60 9.9626 Tf 117.084 0 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(18)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(If)-241(ther)18(e)-240(is)-241(no)-241(overlap)-240(in)-241(the)-241(data)-240(distribution)-241(associated)-241(with)-240(the)-241(descrip-)]TJ 12.453 -11.955 Td [(tor)74(,)-250(no)-250(operations)-250(ar)18(e)-250(performed;)]TJ +0 g 0 G + -12.453 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(The)-284(operator)]TJ/F60 9.9626 Tf 72.855 0 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F62 9.9626 Tf 8.232 -3.617 Td [(performs)-284(the)-284(r)18(eduction)-285(sum)-284(of)-284(overlap)-284(elements;)-302(it)-284(is)-284(a)]TJ -75.039 -11.955 Td [(\223pr)18(olongation\224)-265(operator)]TJ/F60 9.9626 Tf 110.535 0 Td [(P)]TJ/F60 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F62 9.9626 Tf 8.044 -3.616 Td [(that)-265(r)18(eplicates)-266(ov)1(erlap)-266(elements,)-269(accounting)]TJ -124.984 -11.955 Td [(for)-250(the)-250(physical)-250(r)18(eplication)-250(of)-250(data;)]TJ +0 g 0 G + -12.453 -19.925 Td [(3.)]TJ +0 g 0 G + [-500(The)-190(operator)]TJ/F60 9.9626 Tf 70.978 0 Td [(P)]TJ/F60 7.5716 Tf 5.423 -1.495 Td [(a)]TJ/F62 9.9626 Tf 5.848 1.495 Td [(performs)-190(a)-190(scaling)-190(on)-190(the)-190(overlap)-190(elements)-190(by)-190(the)-190(amount)]TJ -69.796 -11.956 Td [(of)-325(r)18(eplication;)-363(thus,)-343(when)-325(combined)-325(with)-325(the)-325(r)18(eduction)-325(operator)74(,)-344(it)-325(im-)]TJ 0 -11.955 Td [(plements)-250(the)-250(average)-250(of)-250(r)18(eplicated)-250(elements)-250(over)-250(all)-250(of)-250(their)-250(instances.)]TJ/F59 11.9552 Tf -24.907 -19.925 Td [(Example)-320(of)-320(use)]TJ/F62 9.9626 Tf 87.879 0 Td [(Consider)-320(the)-320(discr)18(etization)-320(mesh)-320(depicted)-320(in)-320(\002g.)]TJ +0 0 1 rg 0 0 1 RG + [-320(4)]TJ +0 g 0 G + [(,)-337(parti-)]TJ -87.879 -11.955 Td [(tioned)-262(among)-262(two)-263(pr)18(ocesse)1(s)-263(as)-262(shown)-262(by)-262(the)-262(dashed)-263(li)1(nes,)-266(with)-262(an)-262(overlap)-262(of)]TJ 0 -11.955 Td [(1)-261(extr)1(a)-261(layer)-260(with)-261(r)18(espect)-260(to)-261(the)-260(partition)-261(of)-260(\002g.)]TJ +0 0 1 rg 0 0 1 RG + [-261(3)]TJ +0 g 0 G + [(;)-265(the)-261(data)-260(distribution)-261(is)-260(such)]TJ 0 -11.956 Td [(that)-267(each)-268(pr)18(ocess)-267(will)-267(own)-267(40)-268(entries)-267(in)-267(the)-267(index)-268(space,)-271(with)-267(an)-268(overlap)-267(of)-267(16)]TJ 0 -11.955 Td [(entries)-249(placed)-248(at)-249(local)-249(i)1(ndices)-249(25)-249(thr)18(ough)-248(40;)-249(the)-249(halo)-249(will)-248(r)8(un)-249(fr)18(om)-249(local)-248(index)]TJ 0 -11.955 Td [(41)-236(thr)18(ough)-237(local)-236(index)-237(48..)-305(If)-236(pr)18(ocess)-237(0)-236(assigns)-237(an)-236(initial)-236(value)-237(of)-236(1)-236(to)-237(its)-236(entries)]TJ 0 -11.955 Td [(in)-259(the)]TJ/F60 9.9626 Tf 27.963 0 Td [(x)]TJ/F62 9.9626 Tf 7.782 0 Td [(vector)74(,)-261(and)-258(pr)18(ocess)-259(1)-259(assigns)-258(a)-259(value)-259(of)-258(2,)-261(then)-259(after)-258(a)-259(call)-259(to)]TJ/F67 9.9626 Tf 266.124 0 Td [(psb_ovrl)]TJ/F62 9.9626 Tf -301.869 -11.955 Td [(with)]TJ/F67 9.9626 Tf 22.816 0 Td [(psb_avg_)]TJ/F62 9.9626 Tf 44.404 0 Td [(and)-257(a)-257(call)-257(to)]TJ/F67 9.9626 Tf 55.983 0 Td [(psb_halo_)]TJ/F62 9.9626 Tf 49.635 0 Td [(the)-257(contents)-257(of)-257(the)-257(local)-257(vectors)-257(will)-258(b)1(e)]TJ -172.838 -11.955 Td [(the)-250(following)-250(\050showing)-250(a)-250(transition)-250(among)-250(the)-250(two)-250(subdomains\051)]TJ +0 g 0 G + 166.875 -143.462 Td [(63)]TJ +0 g 0 G +ET + +endstream +endobj +1405 0 obj << -/Type /Page -/Contents 1111 0 R -/Resources 1109 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1079 0 R +/Length 3551 >> -% 1112 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F62 7.9701 Tf 265.805 653.177 Td [(Pr)18(ocess)-250(0)-8396(Pr)18(ocess)-250(1)]TJ -31.163 -9.464 Td [(I)-1500(GLOB\050I\051)-1500(X\050I\051)-5163(I)-1500(GLOB\050I\051)-1500(X\050I\051)]TJ -1.299 -9.465 Td [(1)-4774(1)-1920(1.0)-5000(1)-4274(33)-1920(1.5)]TJ 0 -9.464 Td [(2)-4774(2)-1920(1.0)-5000(2)-4274(34)-1920(1.5)]TJ 0 -9.465 Td [(3)-4774(3)-1920(1.0)-5000(3)-4274(35)-1920(1.5)]TJ 0 -9.464 Td [(4)-4774(4)-1920(1.0)-5000(4)-4274(36)-1920(1.5)]TJ 0 -9.465 Td [(5)-4774(5)-1920(1.0)-5000(5)-4274(37)-1920(1.5)]TJ 0 -9.464 Td [(6)-4774(6)-1920(1.0)-5000(6)-4274(38)-1920(1.5)]TJ 0 -9.465 Td [(7)-4774(7)-1920(1.0)-5000(7)-4274(39)-1920(1.5)]TJ 0 -9.464 Td [(8)-4774(8)-1920(1.0)-5000(8)-4274(40)-1920(1.5)]TJ 0 -9.465 Td [(9)-4774(9)-1920(1.0)-5000(9)-4274(41)-1920(2.0)]TJ -3.985 -9.464 Td [(10)-4274(10)-1920(1.0)-4500(10)-4274(42)-1920(2.0)]TJ 0 -9.465 Td [(11)-4274(11)-1920(1.0)-4500(11)-4274(43)-1920(2.0)]TJ 0 -9.464 Td [(12)-4274(12)-1920(1.0)-4500(12)-4274(44)-1920(2.0)]TJ 0 -9.465 Td [(13)-4274(13)-1920(1.0)-4500(13)-4274(45)-1920(2.0)]TJ 0 -9.464 Td [(14)-4274(14)-1920(1.0)-4500(14)-4274(46)-1920(2.0)]TJ 0 -9.465 Td [(15)-4274(15)-1920(1.0)-4500(15)-4274(47)-1920(2.0)]TJ 0 -9.464 Td [(16)-4274(16)-1920(1.0)-4500(16)-4274(48)-1920(2.0)]TJ 0 -9.465 Td [(17)-4274(17)-1920(1.0)-4500(17)-4274(49)-1920(2.0)]TJ 0 -9.464 Td [(18)-4274(18)-1920(1.0)-4500(18)-4274(50)-1920(2.0)]TJ 0 -9.465 Td [(19)-4274(19)-1920(1.0)-4500(19)-4274(51)-1920(2.0)]TJ 0 -9.464 Td [(20)-4274(20)-1920(1.0)-4500(20)-4274(52)-1920(2.0)]TJ 0 -9.465 Td [(21)-4274(21)-1920(1.0)-4500(21)-4274(53)-1920(2.0)]TJ 0 -9.464 Td [(22)-4274(22)-1920(1.0)-4500(22)-4274(54)-1920(2.0)]TJ 0 -9.465 Td [(23)-4274(23)-1920(1.0)-4500(23)-4274(55)-1920(2.0)]TJ 0 -9.464 Td [(24)-4274(24)-1920(1.0)-4500(24)-4274(56)-1920(2.0)]TJ 0 -9.465 Td [(25)-4274(25)-1920(1.5)-4500(25)-4274(57)-1920(2.0)]TJ 0 -9.464 Td [(26)-4274(26)-1920(1.5)-4500(26)-4274(58)-1920(2.0)]TJ 0 -9.465 Td [(27)-4274(27)-1920(1.5)-4500(27)-4274(59)-1920(2.0)]TJ 0 -9.464 Td [(28)-4274(28)-1920(1.5)-4500(28)-4274(60)-1920(2.0)]TJ 0 -9.465 Td [(29)-4274(29)-1920(1.5)-4500(29)-4274(61)-1920(2.0)]TJ 0 -9.464 Td [(30)-4274(30)-1920(1.5)-4500(30)-4274(62)-1920(2.0)]TJ 0 -9.465 Td [(31)-4274(31)-1920(1.5)-4500(31)-4274(63)-1920(2.0)]TJ 0 -9.464 Td [(32)-4274(32)-1920(1.5)-4500(32)-4274(64)-1920(2.0)]TJ 0 -9.465 Td [(33)-4274(33)-1920(1.5)-4500(33)-4274(25)-1920(1.5)]TJ 0 -9.464 Td [(34)-4274(34)-1920(1.5)-4500(34)-4274(26)-1920(1.5)]TJ 0 -9.465 Td [(35)-4274(35)-1920(1.5)-4500(35)-4274(27)-1920(1.5)]TJ 0 -9.464 Td [(36)-4274(36)-1920(1.5)-4500(36)-4274(28)-1920(1.5)]TJ 0 -9.465 Td [(37)-4274(37)-1920(1.5)-4500(37)-4274(29)-1920(1.5)]TJ 0 -9.464 Td [(38)-4274(38)-1920(1.5)-4500(38)-4274(30)-1920(1.5)]TJ 0 -9.465 Td [(39)-4274(39)-1920(1.5)-4500(39)-4274(31)-1920(1.5)]TJ 0 -9.464 Td [(40)-4274(40)-1920(1.5)-4500(40)-4274(32)-1920(1.5)]TJ 0 -9.465 Td [(41)-4274(41)-1920(2.0)-4500(41)-4274(17)-1920(1.0)]TJ 0 -9.464 Td [(42)-4274(42)-1920(2.0)-4500(42)-4274(18)-1920(1.0)]TJ 0 -9.465 Td [(43)-4274(43)-1920(2.0)-4500(43)-4274(19)-1920(1.0)]TJ 0 -9.464 Td [(44)-4274(44)-1920(2.0)-4500(44)-4274(20)-1920(1.0)]TJ 0 -9.465 Td [(45)-4274(45)-1920(2.0)-4500(45)-4274(21)-1920(1.0)]TJ 0 -9.464 Td [(46)-4274(46)-1920(2.0)-4500(46)-4274(22)-1920(1.0)]TJ 0 -9.465 Td [(47)-4274(47)-1920(2.0)-4500(47)-4274(23)-1920(1.0)]TJ 0 -9.464 Td [(48)-4274(48)-1920(2.0)-4500(48)-4274(24)-1920(1.0)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 88.221 -98.979 Td [(64)]TJ +0 g 0 G +ET + +endstream +endobj +1409 0 obj << -/D [1110 0 R /XYZ 149.705 753.953 null] +/Length 321 >> -% 1109 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +1 0 0 1 104.053 292.88 cm +q +.65 0 0 .65 0 0 cm +q +1 0 0 1 0 0 cm +/Im5 Do +Q +Q +0 g 0 G +1 0 0 1 -104.053 -292.88 cm +BT +/F62 9.9626 Tf 189.276 261 Td [(Figur)18(e)-250(4:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ +0 g 0 G +0 g 0 G +0 g 0 G + 77.494 -170.562 Td [(65)]TJ +0 g 0 G +ET + +endstream +endobj +1392 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/try8x8_ov.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 1411 0 R +/BBox [0 0 516 439] +/Resources << /ProcSet [ /PDF /Text ] +/ExtGState << +/R7 1412 0 R +>>/Font << /R8 1413 0 R/R10 1414 0 R>> >> -% 1117 0 obj -<< -/Type /Page -/Contents 1118 0 R -/Resources 1116 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1113 0 R 1114 0 R 1115 0 R ] ->> -% 1113 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 314.69 444.603 326.749] -/A << /S /GoTo /D (vdata) >> ->> -% 1114 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 302.734 333.626 314.794] -/A << /S /GoTo /D (table.8) >> ->> -% 1115 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 232.693 323.106 244.753] -/A << /S /GoTo /D (descdata) >> +/Length 3413 +/Filter /FlateDecode >> -% 1119 0 obj +stream +xœ…›Aä¸ …ïõ+ê8s˜^Ë’-é SvÒ·Ážj‘éÃ&‡üýØä{|ÔI°‡¦(ÚõmÑ|%ZšßŸÛ[yn÷øûúxüôµ?û×£½Ï?lîùõO³ögïíùaV+íùÇ_Í.µÐ=äÝ¥Mù÷:éß·šü³†ÿèòןZKúضÅÇÖŸ[J'¥™Ä´8ÍžùºMTÕmÂúÈiÝ&®ÀëvÉ ö™@î%o3ï‘o{æ ˆlv ß#"›È6²Ù|ˆlvÉD¶"ï×Õ@6“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>È}òmò= òmÏ|‘Íä{Dd³ÙF@6;ï‘Í.€È6CäZ£âÌ$² €löÌÙm"ÛÈnÙGŽì6‘md·K²Ïòˆò33‡ÊÏì™/ òÈåg#"\~>òÈåg#"\~N³ [‹ò3“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>ßòãÐœqhÌtÌÔ3͇ÆL9óÌ.‡æL€CÚ ½·Ðf4f :f š37´fj‹ƒŽƒæL9óÌ.ƒŽ™.ƒÚ ]f h³33Í™Z3µÅŒAÇŒAs¦œyfAÇL€A Í › +ÑlA7bÌ8tS!j¦¶˜qè– ‘3åÌ3»º¥BäÄ– z¨G.á*¹‡jpäªÀ‘ p¨þF.¿¡ê©ø†jo,¥—*o)¼TwKÙ¥ª[Š.ÕÜRr©â–‚Kõ–Ë-U[rCïÌÜŽöVŸç8ßúV]¾c­úz£¿íü½?ZÄ”à+ÄÌ£¾õgD_œßå6^¸û=t‹–×ãûªjn–Œf¿9±¼š ñ…ÖÑêY2šý&Å +\h6´Oõø`Ih®]¦³g4û¡ŒŽ@h6¼!PN¶õ­à»7#¡ÝCèæwù¾*”›uA«H… íšÇMGÛšÑì×?ú¡ÙÐ>ÕãƒEhÐ!3ÇÌh¶$‰¾Kh6¼!P­Ì¹ müî·º ÝC˜DÃ]¾¯jcf]ÊÀ–QÑ)&´Š2@üAZ[ÊÀÖYÑM +͆e€»|_ôŬ롾oÆ‹;2Ñ—"è¨F:çš÷#}]Ëè‰/~.%0QŒ&Gª@_©v&a}þùøwQU=þ}¥j”ž²jþ.ªÿ²®3Ûõ©ìûu¹Ë–:Ù×ÕÉîý 2çË=ÄÇÒïÅe ¥†ñ·p¡?·TòþæwžÄÇ•šÛ%ñ™v©>÷ûÊ®ôÈm—)Æßê¥.=øÜoŸ‹øàÉ|X”Ù"È• +×›€éíAð¹ßÖCˆù\«ÈW˜hùÌï ]|Ûš_®¿Ü®™¯2/7ò™ß×k5ø Ln»`1þ` p¼¿ùís<‰K-³]³p½I™ÞØŸûm•ƒøX¦½¸d›™o‹<¸Ê‘ÏüÎ0ƒ/ÉܲÜ2»æú0=Ó«&ñUÖãcEF¾–ëÃMo£‚ÏýΠúHZ—×XfB¿p}gZz®ŽÎâˆh®ÃGCüdf®ÉÒˆh²äÒH•Ñ´=2²FÔEOh5ÕE_ÐZèSYd…UÑZR½Ôçº ë§T/^†]·=O©Þ©UŸ:á»bÏâMÝüŸg÷7¿3x|ð$>ªžÛ%ñ¹ºÅk¼àsÿ©¥Ÿš^ðQÅoê/ú‚Ïýö¹ˆžÌÕ³¦*æ×»ºÅ Èàsÿ©õŸú[òAÅÀW˜ªøÌï ]|Ûš_ªžÛ5óUæ…ª>óŸZª•U ñó@ÕÃýÍoŸ‹øàI|T=³¡b~½«[¼ô >÷ŸZ ªk}±ƒ™o‹<@õÀg~g˜Á—U/µŸn×\®nñ¶Z|•õÁøS+3ع>\Ýâ…vð¹ßTYõÔqºIóë;ÓÒsutGDŸ±@33tÌã'“0smL–FD“%—FªŒ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zË{¿ª^t¯[¨žÞ§¿û¾…êùÛ@Ä­Ñ`[–«v ,«¼¿ù«º]ñ$>¾È«jHq½©›v‚ÏýEkC½TŒñ·ºi¯ øÜ_ÕòŠ'óá]UWŠëMÝ´‡|î/Zê}ù\ÅÈW˜¨ùÌ_Õ÷Š'ññõ\UkJ¾Ê¼@õÈgþ¢µ¡^í*Æøƒy€êñþæ¯j~Å“øø&®ª?Åõ¦nÚ7 +>÷­ õïÅ7z3óm‘W=ò™¿ªOækª¨ù*óÕ#_e}0¾h;ׇ©›öÄ‚ÏýUm°xßPyPÇp}gZz®ŽÎâˆè«43©cˆŸLÂ̵1Y][V½üέ¦†5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½´â6Tl4©^lÉ]·íMª×µ6ÔNÉ‹»&ž%Ä›ºõ)ÕÃýÍ?Ô‹'ñQõ†:V\ïꛉÁ¿Ö†ÚU ñ¦n±Ý|îêˆÅ“ù zC+®wu‹mÐàƒ_kCí*¾Â\Ýb[=øÜ?Ô‹'ñ •Gè˜_ß™–ž«£³8"ºÇ*ÍÌÐ1ŸLÂ̵1Y=Æ¢zÚð¬÷mŒ,…uÑZMuÑ´úTYaUô„&Õ[ö…›:ÖZG¨žvõ_ºP=ß-Fü®5lËãouÃYË*ïoþ¦ŽX<‰½M+®7uÓy„àsÿ®µ¡6‰Ácü­n:±|îoêˆÅ“ù°§ÛÔ±âzS7¤>÷ïZj?˜|®bä+ÌT|æoêˆÅ“ø¸}ÛÔ±’¯2/P=ò™×ÚP[¿àƒŠ1þ` z¼¿ù›:bñ$>îÔ6u¬¸ÞÔM§W‚Ïý»Ö†Úå}qÇwf¾-òàªG>ó7uÄâÉ|Mõ#_e^ z䫬ÆïZ£ÁÎõaꦓ9Áçþ¦ŽX<‰o¨<¨c¸¾3-=WGgqDô«43©cˆŸLÂ̵1YÝö¬zy?¶¥ž5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½tNÆm¨ØÔn†]·ÚÍðÓ3ˆïZ£Áö, ífณguj7 ?ÏUõÒ‘Ø%ñ¹º ífàóÜßµ6Ô¡ðQņv3tè)øÜ?Õ‹'óAõ¦:V\ïê6´›Ïs×ÚPçcÈÚÍÐq-ñ™ª#Oâ£êMu¬ä«Ì Uoh7Ç_jðQõºÖnŒ?˜ªÞÔn†.â繪^:Óâ6Tlj7Cà‚Ïý]kCzyñÌÌ|[äª7µ›†|YõÒñ·k®W·¡Ý òUÖã»Öh°s}¸º ífðþõÁøy®ª§“*nRÇfìfÄàë,Žˆî±J33tlÄnFI ¶ÉÒˆèy.ª§Ó)žõ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zíÐn†Û®b­i7Cg _Vµ›á§ _µFƒmYbü­n81iYåýͨ#OâãAÀC+®7uÓ©ÈàsÕÚP‡Ácü­n:7|î?Ô‹'óáÌß¡Ž×›ºé®?Œ÷Æå‡ñ¶ŽùÃxü0î÷øúz~ÙëDõ¹žDí‡E]Ó×…aXŽí!Í>øïŸ×düoõ–‘¿Ö«\¯ç­ÜýÌ•çë»ýô—w-ÿ/Iÿõv×ï!o'ÈŸÿ`[G. +endstream +endobj +1417 0 obj << -/D [1117 0 R /XYZ 98.895 753.953 null] +/Filter /FlateDecode +/Subtype /Type1C +/Length 13073 >> - +stream +xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs +ÏÇo”dz%!)&cÒ攤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ +¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ +ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– +Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ +Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 +«CÁ +»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w +v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7Š†%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ +ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ +èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ÎœG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøȪ‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) +sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/ÌåÆæªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²㶢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( +Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ +ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï +;žÓ¸‹FÄd4½…‚ïLûí“‹=wŽ°%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ +GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ×Ø.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïؤžAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ +‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ß•Õ¼Ò2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ +œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu +¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr +0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äó澞7á= +%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl +»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•Ñµa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçƱ€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 +ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ +ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( +½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η + Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi +õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø +¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ +Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*Ž†Ž7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@Ë©Ô·W‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥ÅgrŽ§t0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª +ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& +‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä +]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² + ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ +SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døѪ‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. +¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæ؇ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« +g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 +Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E +G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA +à= +®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ +è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞƇŒ¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ endstream endobj -1125 0 obj +1418 0 obj << -/Length 4322 +/Filter /FlateDecode +/Subtype /Type1C +/Length 11578 >> stream -0 g 0 G -0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(Function)-250(V)111(alue)]TJ -0 g 0 G -/F54 9.9626 Tf 73.882 0 Td [(is)-250(the)-250(2-norm)-250(of)-250(vector)]TJ/F52 9.9626 Tf 102.161 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -156.342 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.133 0 Td [(global)]TJ/F54 9.9626 Tf 30.675 0 Td [(unless)-190(the)-190(optional)-190(variable)]TJ/F59 9.9626 Tf 121.612 0 Td [(global)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.false.)]TJ/F54 9.9626 Tf 75.118 0 Td [(has)-190(been)-190(spec-)]TJ -258.538 -11.955 Td [(i\002ed)]TJ 0 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)-190(computation)-190(of)-190(a)-190(global)-190(r)18(esult)-190(r)18(equir)18(es)-190(a)-190(global)-190(communication,)-202(which)]TJ 12.453 -11.955 Td [(entails)-318(a)-318(signi\002cant)-318(over)18(head.)-513(It)-318(may)-318(be)-318(necessary)-318(and/or)-317(advisable)-318(to)]TJ 0 -11.955 Td [(compute)-333(multiple)-333(norms)-333(at)-332(the)-333(same)-333(time;)-374(in)-333(this)-333(case,)-354(it)-332(is)-333(possible)-333(to)]TJ 0 -11.955 Td [(impr)18(ove)-250(the)-250(r)8(untime)-250(ef)18(\002ciency)-250(by)-250(using)-250(the)-250(following)-250(scheme:)]TJ 24.981 -17.933 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(1)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ -ET -q -1 0 0 1 278.034 495.12 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 281.627 494.921 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(1)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ -ET -q -1 0 0 1 367.96 495.12 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 371.922 494.921 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(2)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ -ET -q -1 0 0 1 278.034 483.165 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 281.627 482.966 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(2)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ -ET -q -1 0 0 1 367.96 483.165 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 371.922 482.966 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -171.33 -11.955 Td [(v)-107(r)-108(e)-107(s)-266(\050)-159(3)-158(\051)-756(=)-657(p)-61(s)-61(b)]TJ -ET -q -1 0 0 1 278.034 471.21 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 281.627 471.011 Td [(g)-61(e)-60(n)-61(r)-61(m)-60(2)-194(\050)-180(x)-46(3)-267(,)-273(d)-97(e)-98(s)-98(c)]TJ -ET -q -1 0 0 1 367.96 471.21 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 371.922 471.011 Td [(a)-371(,)-283(i)-108(n)-108(f)-108(o)-391(,)-298(g)-123(l)-123(o)-124(b)-123(a)-123(l)-238(=)-115(.)-277(f)-162(a)-162(l)-162(s)-163(e)-367(.)-206(\051)]TJ -170.658 -11.956 Td [(c)-175(a)-175(l)-174(l)-831(p)-56(s)-56(b)]TJ -ET -q -1 0 0 1 247.952 459.255 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 251.497 459.055 Td [(n)-56(r)-56(m)-55(2)-190(\050)-264(c)-132(t)-131(x)-131(t)-438(,)-283(v)-107(r)-107(e)-108(s)-300(\050)-193(1)-193(:)-193(3)-193(\051)-193(\051)]TJ -0 g 0 G -0 g 0 G - -75.886 -21.917 Td [(In)-253(this)-252(way)-253(the)-253(global)-253(communication,)-253(which)-253(for)-252(small)-253(sizes)-253(is)-252(a)-253(latency-)]TJ 0 -11.956 Td [(bound)-250(operation,)-250(is)-250(invoked)-250(only)-250(once.)]TJ -0 g 0 G - 141.968 -334.744 Td [(44)]TJ -0 g 0 G -ET - +xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» +bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o +‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU +‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûĪ¾XËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ +´Õ= +«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið +ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É +|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajMè‚šÉm¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøЉ•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ +SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi +‹» xŸÃk\_¥OÌ +O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôÑ°€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| +ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#Š°6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ +·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²G᧊RjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä +„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy +GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØΓë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO +è ~nè¸Ó\“¡ÍÑeåf +Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QŠ¦Žmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW +å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊá¡žž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ +Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe +€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'ඊ + iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ +Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ +(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígÏ°çÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ +œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ +Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} +lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒÑ‘Ð IòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L +³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië +H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ +Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú +?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” +å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ +Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, +¬Á®¶QU¯Ž§_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm +êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ +ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ +Ê Å±»ò + Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄEéƶîÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG +mÙ_¨&òK + ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë +ó‹‹Ê„²êlŽG™ƒýUª€ÝWŽ¶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ +åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ +Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ +ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ +ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ endstream endobj -1138 0 obj +1424 0 obj << -/Length 6050 +/Length 8493 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.9)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(5.3)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(genrm2s)-250(\227)-250(Generalized)-250(2-Norm)-250(of)-250(V)111(ector)]TJ/F54 9.9626 Tf -51.121 -18.964 Td [(This)-216(subr)18(outine)-217(computes)-216(a)-217(series)-216(of)-216(2-norms)-217(on)-216(the)-217(columns)-216(of)-216(a)-217(dense)-216(matrix)]TJ/F52 9.9626 Tf 0.294 -11.955 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(:)]TJ/F52 9.9626 Tf 126.858 -11.955 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 12.294 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F83 10.3811 Tf 7.041 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.006 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)-13(,)]TJ/F52 9.9626 Tf 6.821 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.744 Td [(2)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -186.919 -16.189 Td [(call)]TJ -0 g 0 G - [-525(psb_genrm2s\050res,)-525(x,)-525(desc_a,)-525(info\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 126.577 630.954 cm -[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S -Q -BT -/F52 9.9626 Tf 132.579 622.386 Td [(r)-17(e)-25(s)-8868(x)]TJ/F51 9.9626 Tf 221.014 0 Td [(Subroutine)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ/F62 9.9626 Tf -51.12 -19.441 Td [(These)-280(subr)18(outines)-280(collect)-280(the)-280(portions)-280(of)-280(g)1(lobal)-280(dense)-280(matrix)-280(distributed)-280(over)]TJ 0 -11.955 Td [(all)-250(pr)18(ocess)-250(into)-250(one)-250(single)-250(array)-250(stor)18(ed)-250(on)-250(one)-250(pr)18(ocess.)]TJ/F60 9.9626 Tf 120.161 -25.465 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 126.577 618.6 cm -[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S -Q -BT -/F54 9.9626 Tf 132.554 610.032 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 369.912 610.231 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 372.9 610.032 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 369.912 598.276 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 372.9 598.077 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-1200(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 369.912 586.321 cm +1 0 0 1 289.521 649.467 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.9 586.122 Td [(genrm2s)]TJ -240.346 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-1279(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F60 9.9626 Tf 292.803 649.268 Td [(x)]TJ/F91 10.3811 Tf 8.097 0 Td [(\040)]TJ/F60 9.9626 Tf 13.398 0 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ/F93 10.3811 Tf 27.705 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 369.912 574.366 cm +1 0 0 1 359.144 649.467 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 372.9 574.167 Td [(genrm2s)]TJ -ET -q -1 0 0 1 126.577 570.381 cm -[]0 d 0 J 0.398 w 0 0 m 290.348 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 229.958 542.002 Td [(T)92(able)-250(9:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -130.063 -34.468 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.864 420.062 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 389.002 419.863 Td [(T)]TJ -ET -q -1 0 0 1 394.86 420.062 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 397.998 419.863 Td [(vect)]TJ -ET -q -1 0 0 1 419.547 420.062 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.685 419.863 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.883 -11.956 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(9)]TJ +/F60 9.9626 Tf 362.427 649.268 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F93 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F62 9.9626 Tf -219.744 -22.41 Td [(wher)18(e:)]TJ 0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(desc)]TJ +/F60 9.9626 Tf 0.344 -20.664 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 120.408 388.181 cm +1 0 0 1 169.703 606.393 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 387.982 Td [(a)]TJ +/F60 9.9626 Tf 172.986 606.194 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 340.361 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 340.161 Td [(desc)]TJ +/F62 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(submatrix)]TJ/F60 9.9626 Tf 103.256 0 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 298.05 340.361 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 305.084 606.393 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 301.189 340.161 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -19.925 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(res)]TJ +/F60 9.9626 Tf 308.366 606.194 Td [(x)]TJ/F62 7.5716 Tf 5.106 -1.858 Td [(1)-13(:)]TJ/F60 7.5716 Tf 5.963 0 Td [(m)]TJ/F62 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F60 7.5716 Tf 7.856 0 Td [(n)]TJ 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(contains)-250(the)-250(1-norm)-250(of)-250(\050the)-250(columns)-250(of\051)]TJ/F52 9.9626 Tf 176.182 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -174.742 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -106.261 Td [(45)]TJ -0 g 0 G -ET - -endstream -endobj -1145 0 obj -<< -/Length 5385 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.10)-1000(psb)]TJ +/F60 9.9626 Tf -182.447 -19.051 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 163.696 585.484 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(norm1)-250(\227)-250(1-Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(1-norm)-250(of)-250(a)-250(matrix)]TJ/F52 9.9626 Tf 208.231 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(:)]TJ/F52 9.9626 Tf -74.65 -33.873 Td [(n)-15(r)-35(m)]TJ/F54 9.9626 Tf 17.788 0 Td [(1)]TJ/F83 10.3811 Tf 7.873 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.335 0 Td [(A)]TJ/F83 10.3811 Tf 7.442 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.858 Td [(1)]TJ/F54 9.9626 Tf -198.652 -20.06 Td [(wher)18(e:)]TJ +/F60 9.9626 Tf 166.979 585.285 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ 0 g 0 G -/F52 9.9626 Tf 0.622 -19.925 Td [(A)]TJ +/F62 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 234.034 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F52 9.9626 Tf 125.981 0 Td [(A)]TJ +/F60 9.9626 Tf -266.027 -20.91 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ +0 g 0 G +/F62 9.9626 Tf 32.563 0 Td [(is)-250(the)-250(collect)-250(function.)]TJ 0 g 0 G 0 g 0 G 0 g 0 G ET q -1 0 0 1 229.61 588.515 cm -[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +1 0 0 1 230.392 543.107 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F52 9.9626 Tf 236.21 579.947 Td [(A)]TJ/F51 9.9626 Tf 120.293 0 Td [(Function)]TJ +/F60 9.9626 Tf 236.663 534.539 Td [(x)]TJ/F60 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F62 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F60 9.9626 Tf 4.276 0 Td [(y)]TJ/F59 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ ET q -1 0 0 1 229.61 576.161 cm -[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +1 0 0 1 230.392 530.753 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F54 9.9626 Tf 235.587 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F62 9.9626 Tf 236.369 522.185 Td [(Integer)-8983(psb)]TJ ET q -1 0 0 1 372.821 567.793 cm +1 0 0 1 373.603 522.385 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 567.594 Td [(spnrm1)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F62 9.9626 Tf 376.592 522.185 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 372.821 555.838 cm +1 0 0 1 373.603 510.429 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 555.638 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F62 9.9626 Tf 376.592 510.23 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 372.821 543.882 cm +1 0 0 1 373.603 498.474 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 543.683 Td [(spnrm1)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F62 9.9626 Tf 376.592 498.275 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET q -1 0 0 1 372.821 531.927 cm +1 0 0 1 373.603 486.519 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 375.81 531.728 Td [(spnrm1)]TJ +/F62 9.9626 Tf 376.592 486.32 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 229.61 527.942 cm -[]0 d 0 J 0.398 w 0 0 m 185.901 0 l S +1 0 0 1 373.603 474.564 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 376.592 474.365 Td [(gather)]TJ +ET +q +1 0 0 1 230.392 470.579 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q 0 g 0 G BT -/F54 9.9626 Tf 278.277 499.564 Td [(T)92(able)-250(10:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 278.277 442.2 Td [(T)92(able)-250(19:)-310(Data)-250(types)]TJ +0 g 0 G 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -127.572 -27.052 Td [(call)]TJ 0 g 0 G + [-525(psb_gather\050glob_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(loc_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -127.572 -33.873 Td [(psb_spnrm1\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_norm1\050A,)-525(desc_a,)-525(info\051)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-525(root\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-190(call)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(psb_gather\050glob_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(loc_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(a)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(root\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.902 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.91 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.909 Td [(loc)]TJ +ET +q +1 0 0 1 164.583 350.626 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 167.571 350.427 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 194.722 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -187.095 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ +ET +q +1 0 0 1 371.853 350.626 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F60 9.9626 Tf 375.135 350.427 Td [(x)]TJ/F62 9.9626 Tf 5.206 0 Td [(.)]TJ -204.73 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.242 0 Td [(psb)]TJ +/F67 9.9626 Tf 244.743 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 344.346 cm +1 0 0 1 436.673 302.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 344.147 Td [(Tspmat)]TJ +/F67 9.9626 Tf 439.811 302.606 Td [(T)]TJ ET q -1 0 0 1 359.321 344.346 cm +1 0 0 1 445.669 302.805 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 448.807 302.606 Td [(vect)]TJ +ET +q +1 0 0 1 470.356 302.805 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 362.459 344.147 Td [(type)]TJ +/F67 9.9626 Tf 473.495 302.606 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf -297.884 -11.955 Td [(indicated)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(19)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -232.675 -19.926 Td [(desc)]TJ +/F59 9.9626 Tf -24.906 -20.91 Td [(desc)]TJ ET q -1 0 0 1 171.218 324.421 cm +1 0 0 1 171.218 269.941 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 324.221 Td [(a)]TJ +/F59 9.9626 Tf 174.207 269.741 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 324.173 276.6 cm +1 0 0 1 360.068 222.12 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 327.311 276.401 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 221.921 Td [(desc)]TJ ET q -1 0 0 1 348.86 276.6 cm +1 0 0 1 384.755 222.12 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 351.998 276.401 Td [(type)]TJ +/F67 9.9626 Tf 387.893 221.921 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.214 -19.926 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -20.91 Td [(root)]TJ 0 g 0 G +/F62 9.9626 Tf 23.252 0 Td [(The)-253(pr)18(ocess)-254(that)-253(holds)-253(the)-253(global)-254(copy)111(.)-319(If)]TJ/F60 9.9626 Tf 182.635 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F93 10.3811 Tf 19.983 0 Td [(=)]TJ/F91 10.3811 Tf 11.147 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-253(all)-254(t)1(he)-254(pr)18(ocesses)-253(will)]TJ -220.305 -11.955 Td [(have)-250(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F91 10.3811 Tf 142.42 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)]TJ/F91 10.3811 Tf 7.873 0 Td [(\024)]TJ/F60 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F91 10.3811 Tf 19.923 0 Td [(\024)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)]TJ/F91 10.3811 Tf 43.89 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1.)]TJ 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(is)-250(the)-250(1-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F52 9.9626 Tf 150.4 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -205.587 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ +/F59 9.9626 Tf -301.107 -20.909 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -54.456 Td [(46)]TJ +/F62 9.9626 Tf 166.874 -29.888 Td [(66)]TJ 0 g 0 G ET endstream endobj -1152 0 obj +1429 0 obj << -/Length 5403 +/Length 1417 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.11)-1000(psb)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(glob)]TJ ET q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 120.976 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(normi)-250(\227)-250(In\002nity)-250(Norm)-250(of)-250(Sparse)-250(Matrix)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(in\002nity-norm)-250(of)-250(a)-250(matrix)]TJ/F52 9.9626 Tf 235.459 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(:)]TJ/F52 9.9626 Tf -102.327 -33.873 Td [(n)-15(r)-35(m)-18(i)]TJ/F83 10.3811 Tf 23.698 0 Td [(\040)-291(k)]TJ/F52 9.9626 Tf 19.336 0 Td [(A)]TJ/F83 10.3811 Tf 7.442 0 Td [(k)]TJ/F96 7.5716 Tf 5.409 -1.494 Td [(\245)]TJ/F54 9.9626 Tf -196.335 -20.424 Td [(wher)18(e:)]TJ +/F59 9.9626 Tf 123.965 706.129 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(wher)18(e)-250(the)-250(local)-250(parts)-250(must)-250(be)-250(gather)18(ed.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)-250(with)-250(the)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 202.459 0 Td [(ALLOCATABLE)]TJ 0 g 0 G -/F52 9.9626 Tf 0.623 -19.925 Td [(A)]TJ +/F62 9.9626 Tf 60.024 0 Td [(attribute.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.299 0 Td [(r)18(epr)18(esents)-250(the)-250(global)-250(matrix)]TJ/F52 9.9626 Tf 125.981 0 Td [(A)]TJ +/F59 9.9626 Tf -287.39 -19.925 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G + 141.968 -500.124 Td [(67)]TJ 0 g 0 G ET -q -1 0 0 1 179.842 588.515 cm -[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S -Q + +endstream +endobj +1436 0 obj +<< +/Length 7428 +>> +stream +0 g 0 G +0 g 0 G BT -/F52 9.9626 Tf 186.442 579.947 Td [(A)]TJ/F51 9.9626 Tf 120.292 0 Td [(Function)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(5.4)-1000(psb)]TJ ET q -1 0 0 1 179.842 576.161 cm -[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 185.819 567.594 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ/F62 9.9626 Tf -51.12 -20.363 Td [(These)-223(subr)18(outines)-223(scatters)-224(the)-223(portions)-223(of)-224(global)-223(dense)-223(matrix)-223(owned)-224(by)-223(a)-223(pr)18(o-)]TJ 0 -11.955 Td [(cess)-250(to)-250(all)-250(the)-250(pr)18(ocesses)-250(in)-250(the)-250(pr)18(ocesses)-250(grid.)]TJ/F60 9.9626 Tf 119.478 -26.893 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 323.053 567.793 cm +1 0 0 1 283.05 647.117 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 567.594 Td [(spnrmi)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F60 9.9626 Tf 286.333 646.918 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 5.642 1.96 Td [(\040)]TJ/F60 9.9626 Tf 13.398 0 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ/F93 10.3811 Tf 28.632 0 Td [(\050)]TJ/F60 9.9626 Tf 4.493 0 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 323.053 555.838 cm +1 0 0 1 362.3 647.117 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 555.638 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F60 9.9626 Tf 365.583 646.918 Td [(x)]TJ/F93 10.3811 Tf 5.329 0 Td [(\051)]TJ/F62 9.9626 Tf -220.207 -23.362 Td [(wher)18(e:)]TJ +0 g 0 G +/F60 9.9626 Tf 0.344 -22.091 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 323.053 543.882 cm +1 0 0 1 169.703 601.664 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 543.683 Td [(spnrmi)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F60 9.9626 Tf 172.986 601.465 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(matrix)]TJ/F60 9.9626 Tf 87.515 0 Td [(g)-25(l)-55(o)-35(b)]TJ ET q -1 0 0 1 323.053 531.927 cm +1 0 0 1 289.343 601.664 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 326.042 531.728 Td [(spnrmi)]TJ +/F60 9.9626 Tf 292.626 601.465 Td [(x)]TJ/F62 7.5716 Tf 5.105 -1.858 Td [(1)-13(:)]TJ/F60 7.5716 Tf 5.963 0 Td [(m)]TJ/F62 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F60 7.5716 Tf 7.856 0 Td [(n)]TJ +0 g 0 G +/F60 9.9626 Tf -166.706 -20.955 Td [(l)-55(o)-35(c)]TJ ET q -1 0 0 1 179.842 527.942 cm -[]0 d 0 J 0.398 w 0 0 m 183.819 0 l S +1 0 0 1 163.696 578.851 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 227.467 499.564 Td [(T)92(able)-250(11:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G +/F60 9.9626 Tf 166.979 578.652 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ 0 g 0 G -/F59 9.9626 Tf -127.572 -33.873 Td [(psb_spnrmi\050A,)-525(desc_a,)-525(info\051)]TJ 0 -11.955 Td [(psb_normi\050A,)-525(desc_a,)-525(info\051)]TJ +/F62 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 234.034 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F60 9.9626 Tf -266.027 -22.813 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 33.489 0 Td [(is)-250(the)-250(scatter)-250(function.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 194.722 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(.)]TJ -187.096 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 344.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 230.392 532.667 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F59 9.9626 Tf 276.501 344.147 Td [(Tspmat)]TJ +/F60 9.9626 Tf 236.663 524.099 Td [(x)]TJ/F60 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F62 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F60 9.9626 Tf 4.276 0 Td [(y)]TJ/F59 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ ET q -1 0 0 1 308.511 344.346 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 230.392 520.313 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S Q BT -/F59 9.9626 Tf 311.649 344.147 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -232.676 -19.926 Td [(desc)]TJ +/F62 9.9626 Tf 236.369 511.745 Td [(Integer)-8983(psb)]TJ ET q -1 0 0 1 120.408 324.421 cm +1 0 0 1 373.603 511.945 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 324.221 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F62 9.9626 Tf 376.592 511.745 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ ET q -1 0 0 1 273.363 276.6 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 373.603 499.989 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 276.501 276.401 Td [(desc)]TJ +/F62 9.9626 Tf 376.592 499.79 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ ET q -1 0 0 1 298.05 276.6 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 373.603 488.034 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 301.189 276.401 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -19.926 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ -0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(is)-250(the)-250(in\002nity-norm)-250(of)-250(sparse)-250(submatrix)]TJ/F52 9.9626 Tf 177.627 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -232.814 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(long)-250(pr)18(ecision)-250(r)18(eal)-250(number)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -54.456 Td [(47)]TJ -0 g 0 G +/F62 9.9626 Tf 376.592 487.835 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ ET - -endstream -endobj -1163 0 obj -<< -/Length 7234 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 373.603 476.079 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.12)-1000(psb)]TJ +/F62 9.9626 Tf 376.592 475.88 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 373.603 464.124 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(spmm)-250(\227)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Product)]TJ/F54 9.9626 Tf -57.098 -19.303 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(Sparse)-250(Matrix)-250(by)-250(Dense)-250(Matrix)-250(Pr)18(oduct:)]TJ/F52 9.9626 Tf 140.147 -24.611 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.397 0 Td [(a)]TJ/F52 9.9626 Tf 6.008 0 Td [(A)-42(x)]TJ/F85 10.3811 Tf 14.878 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 133.513 0 Td [(\0501\051)]TJ +/F62 9.9626 Tf 376.592 463.925 Td [(scatter)]TJ +ET +q +1 0 0 1 230.392 460.139 cm +[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +Q 0 g 0 G -/F52 9.9626 Tf -194.745 -20.13 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.398 0 Td [(a)]TJ/F52 9.9626 Tf 6.007 0 Td [(A)]TJ/F52 7.5716 Tf 7.511 4.115 Td [(T)]TJ/F52 9.9626 Tf 5.694 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ +BT +/F62 9.9626 Tf 278.277 431.76 Td [(T)92(able)-250(20:)-310(Data)-250(types)]TJ 0 g 0 G -/F54 9.9626 Tf 130.715 0 Td [(\0502\051)]TJ 0 g 0 G -/F52 9.9626 Tf -195.482 -20.129 Td [(y)]TJ/F83 10.3811 Tf 7.998 0 Td [(\040)]TJ/F60 9.9626 Tf 13.397 0 Td [(a)]TJ/F52 9.9626 Tf 6.008 0 Td [(A)]TJ/F52 7.5716 Tf 7.7 4.114 Td [(H)]TJ/F52 9.9626 Tf 6.981 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -112.628 -28.004 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 129.977 0 Td [(\0503\051)]TJ + [-525(psb_scatter\050glob_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -317.15 -18.633 Td [(wher)18(e:)]TJ + [-525(loc_x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F52 9.9626 Tf -14.65 -20.451 Td [(x)]TJ + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.61 0 Td [(x)]TJ/F54 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ + [-525(info,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F52 9.9626 Tf -131.167 -19.132 Td [(y)]TJ + [-525(root,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.441 0 Td [(y)]TJ/F54 7.5716 Tf 5.2 -1.494 Td [(:)-13(,)-12(:)]TJ + [-525(mold\051)]TJ 0 g 0 G -/F52 9.9626 Tf -130.23 -19.131 Td [(A)]TJ +/F59 9.9626 Tf -14.944 -24.806 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 12.299 0 Td [(is)-250(the)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 118.41 0 Td [(A)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -22.813 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -22.813 Td [(glob)]TJ ET q -1 0 0 1 230.392 517.986 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.992 509.418 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(,)]TJ/F60 9.9626 Tf 5.106 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(,)]TJ/F60 9.9626 Tf 5.355 0 Td [(b)]TJ/F51 9.9626 Tf 76.437 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 505.633 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 236.369 497.065 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 373.603 497.264 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 497.065 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 373.603 485.309 cm +1 0 0 1 171.786 333.523 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 485.11 Td [(spmm)]TJ -140.223 -11.956 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F59 9.9626 Tf 174.774 333.324 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(that)-250(must)-250(be)-250(scatter)18(ed)-250(into)-250(local)-250(pieces.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -22.814 Td [(desc)]TJ ET q -1 0 0 1 373.603 473.354 cm +1 0 0 1 171.218 262.89 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 473.154 Td [(spmm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F59 9.9626 Tf 174.207 262.69 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 461.398 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 215.069 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 461.199 Td [(spmm)]TJ +/F67 9.9626 Tf 363.206 214.87 Td [(desc)]TJ ET q -1 0 0 1 230.392 457.413 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 384.755 215.069 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 278.277 429.035 Td [(T)92(able)-250(12:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -107.398 -24.261 Td [(call)]TJ +/F67 9.9626 Tf 387.893 214.87 Td [(type)]TJ 0 g 0 G - [-525(psb_spmm\050alpha,)-525(a,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -14.944 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_spmm\050alpha,)-525(a,)-525(x,)-525(beta,)-525(y,desc_a,)-525(info,)-525(trans,)-525(work\051)]TJ -0 g 0 G -/F51 9.9626 Tf -5.23 -22.618 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -258.11 -22.813 Td [(root)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.626 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 23.252 0 Td [(The)-218(pr)18(ocess)-218(that)-218(holds)-219(t)1(he)-219(global)-218(copy)111(.)-299(If)]TJ/F60 9.9626 Tf 179.982 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F93 10.3811 Tf 19.922 0 Td [(=)]TJ/F91 10.3811 Tf 11.086 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-218(all)-218(the)-218(pr)18(ocesses)-219(have)]TJ -217.53 -11.956 Td [(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-258(as:)-327(an)-258(integer)-259(variable)]TJ/F91 10.3811 Tf 142.917 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)]TJ/F91 10.3811 Tf 8.027 0 Td [(\024)]TJ/F60 9.9626 Tf 11.139 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F91 10.3811 Tf 20.077 0 Td [(\024)]TJ/F60 9.9626 Tf 11.239 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.534 0 Td [(\000)]TJ/F62 9.9626 Tf 10.162 0 Td [(1,)-260(default)]TJ/F67 9.9626 Tf 43.952 0 Td [(psb_root_)]TJ/F62 9.9626 Tf 47.073 0 Td [(,)]TJ -316.314 -11.955 Td [(i.e.)-310(pr)18(ocess)-250(0.)]TJ 0 g 0 G + 141.968 -29.888 Td [(68)]TJ 0 g 0 G - 0 -20.626 Td [(alpha)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.368 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(12)]TJ +ET + +endstream +endobj +1443 0 obj +<< +/Length 3984 +>> +stream 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.626 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 164.964 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -157.337 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.242 0 Td [(psb)]TJ -ET -q -1 0 0 1 324.173 212.882 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 327.311 212.682 Td [(Tspmat)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(mold)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F67 9.9626 Tf 199.086 0 Td [(psb)]TJ ET q -1 0 0 1 359.321 212.882 cm +1 0 0 1 340.207 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 362.459 212.682 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -232.675 -20.625 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ +/F67 9.9626 Tf 343.345 658.308 Td [(T)]TJ ET q -1 0 0 1 436.673 144.435 cm +1 0 0 1 349.203 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 144.236 Td [(T)]TJ +/F67 9.9626 Tf 352.341 658.308 Td [(base)]TJ ET q -1 0 0 1 445.669 144.435 cm +1 0 0 1 373.89 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 144.236 Td [(vect)]TJ +/F67 9.9626 Tf 377.028 658.308 Td [(vect)]TJ ET q -1 0 0 1 470.356 144.435 cm +1 0 0 1 398.577 658.507 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 144.236 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-278(12)]TJ -0 g 0 G - [(.)-395(The)-278(rank)-279(of)]TJ/F52 9.9626 Tf 275.498 0 Td [(x)]TJ/F54 9.9626 Tf 7.978 0 Td [(must)-278(be)]TJ -283.476 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -0 g 0 G - 83.916 -29.888 Td [(48)]TJ -0 g 0 G +/F67 9.9626 Tf 401.716 658.308 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)-250(loc)]TJ ET - -endstream -endobj -1178 0 obj -<< -/Length 6532 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 234.988 646.552 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(beta)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.617 0 Td [(b)]TJ/F54 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(12)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.597 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ +/F62 9.9626 Tf 237.976 646.353 Td [(x)-250(is)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 50.53 0 Td [(psb)]TJ ET q -1 0 0 1 385.864 592.09 cm +1 0 0 1 304.825 646.552 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 389.002 591.891 Td [(T)]TJ +/F67 9.9626 Tf 307.963 646.353 Td [(T)]TJ ET q -1 0 0 1 394.86 592.09 cm +1 0 0 1 313.821 646.552 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 591.891 Td [(vect)]TJ +/F67 9.9626 Tf 316.959 646.353 Td [(vect)]TJ ET q -1 0 0 1 419.547 592.09 cm +1 0 0 1 338.508 646.552 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 591.891 Td [(type)]TJ +/F67 9.9626 Tf 341.646 646.353 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.956 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-280(12)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -262.673 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)-400(The)-280(rank)-280(of)]TJ/F52 9.9626 Tf 275.562 0 Td [(y)]TJ/F54 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.115 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -83.228 -18.597 Td [(desc)]TJ + 0 -19.926 Td [(loc)]TJ ET q -1 0 0 1 120.408 549.583 cm +1 0 0 1 113.773 606.702 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 549.383 Td [(a)]TJ +/F59 9.9626 Tf 116.762 606.502 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F60 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ +ET +q +1 0 0 1 321.043 606.702 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F60 9.9626 Tf 324.326 606.502 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ -204.729 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-214(as:)-292(a)-215(rank)-214(one)-214(or)-214(two)-215(ALLOCA)74(T)74(ABLE)-214(array)-214(or)-214(an)-215(object)-214(of)-214(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 0 -11.955 Td [(psb)]TJ ET q -1 0 0 1 273.363 501.762 cm +1 0 0 1 141.121 546.926 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 501.563 Td [(desc)]TJ +/F67 9.9626 Tf 144.259 546.727 Td [(T)]TJ ET q -1 0 0 1 298.05 501.762 cm +1 0 0 1 150.117 546.926 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 501.563 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -18.597 Td [(trans)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(indicates)-250(what)-250(kind)-250(of)-250(operation)-250(to)-250(perform.)]TJ -0 g 0 G -/F51 9.9626 Tf -2.759 -18.597 Td [(trans)-250(=)-250(N)]TJ -0 g 0 G -/F54 9.9626 Tf 46.983 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ -0 0 1 rg 0 0 1 RG - [-250(1)]TJ -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -46.983 -14.612 Td [(trans)-250(=)-250(T)]TJ -0 g 0 G -/F54 9.9626 Tf 45.33 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -45.33 -14.612 Td [(trans)-250(=)-250(C)]TJ -0 g 0 G -/F54 9.9626 Tf 45.878 0 Td [(the)-250(operation)-250(is)-250(speci\002ed)-250(by)-250(equation)]TJ -0 0 1 rg 0 0 1 RG - [-250(3)]TJ -0 g 0 G - -45.878 -18.597 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F85 10.3811 Tf 25.193 0 Td [(=)]TJ/F52 9.9626 Tf 11.434 0 Td [(N)]TJ/F54 9.9626 Tf -75.267 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.596 Td [(work)]TJ -0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-270(as:)-351(a)-270(rank)-270(one)-270(array)-271(of)-270(the)-270(same)-270(type)-271(of)]TJ/F52 9.9626 Tf 220.875 0 Td [(x)]TJ/F54 9.9626 Tf 7.898 0 Td [(and)]TJ/F52 9.9626 Tf 19.684 0 Td [(y)]TJ/F54 9.9626 Tf 7.798 0 Td [(with)-270(the)-270(T)74(AR-)]TJ -256.255 -11.955 Td [(GET)-250(attribute.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.597 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -18.597 Td [(y)]TJ +/F67 9.9626 Tf 153.255 546.727 Td [(vect)]TJ +ET +q +1 0 0 1 174.804 546.926 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 177.942 546.727 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(matrix)]TJ/F52 9.9626 Tf 144.939 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -135.659 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ +/F62 9.9626 Tf 23.412 0 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ 0 0 1 rg 0 0 1 RG - [-250(12)]TJ + [-250(20)]TJ 0 g 0 G [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -18.597 Td [(info)]TJ +/F59 9.9626 Tf -101.459 -19.926 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 141.968 -36.529 Td [(49)]TJ + 141.968 -388.543 Td [(69)]TJ 0 g 0 G ET endstream endobj -1185 0 obj +1447 0 obj << -/Length 7154 +/Length 6319 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(4.13)-1000(psb)]TJ +/F59 14.3462 Tf 150.705 706.042 Td [(6)-1000(Data)-250(management)-250(routines)]TJ/F59 11.9552 Tf 0 -24.694 Td [(6.1)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 198.238 681.547 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(spsm)-250(\227)-250(T)111(riangular)-250(System)-250(Solve)]TJ/F54 9.9626 Tf -57.098 -19.83 Td [(This)-250(subr)18(outine)-250(computes)-250(the)-250(T)90(riangular)-250(System)-250(Solve:)]TJ/F52 9.9626 Tf 122.724 -35.213 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F54 7.5716 Tf 6.228 0 Td [(1)]TJ/F52 9.9626 Tf 4.577 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -82.196 -16.139 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F83 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F54 7.5716 Tf 6.227 0 Td [(1)]TJ/F52 9.9626 Tf 4.578 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.139 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F54 7.5716 Tf 6.228 0 Td [(1)]TJ/F52 9.9626 Tf 4.607 -4.114 Td [(D)-52(x)]TJ/F85 10.3811 Tf 15.536 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -90.495 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F52 7.5716 Tf 6.421 0 Td [(T)]TJ/F52 9.9626 Tf 5.694 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -83.506 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F83 7.8896 Tf 14.775 4.114 Td [(\000)]TJ/F52 7.5716 Tf 6.42 0 Td [(T)]TJ/F52 9.9626 Tf 5.695 -4.114 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.114 Td [(\000)]TJ/F52 7.5716 Tf 6.421 0 Td [(T)]TJ/F52 9.9626 Tf 5.724 -4.114 Td [(D)-52(x)]TJ/F85 10.3811 Tf 15.536 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -91.805 -16.091 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F52 7.5716 Tf 6.61 0 Td [(H)]TJ/F52 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.648 0 Td [(y)]TJ -84.982 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.708 0 Td [(D)-48(T)]TJ/F83 7.8896 Tf 14.775 4.115 Td [(\000)]TJ/F52 7.5716 Tf 6.61 0 Td [(H)]TJ/F52 9.9626 Tf 6.982 -4.115 Td [(x)]TJ/F85 10.3811 Tf 7.267 0 Td [(+)]TJ/F60 9.9626 Tf 10.504 0 Td [(b)]TJ/F52 9.9626 Tf 5.649 0 Td [(y)]TJ -93.281 -16.09 Td [(y)]TJ/F83 10.3811 Tf 15.193 0 Td [(\040)]TJ/F60 9.9626 Tf 20.593 0 Td [(a)]TJ/F52 9.9626 Tf 5.639 0 Td [(T)]TJ/F83 7.8896 Tf 6.545 4.115 Td [(\000)]TJ/F52 7.5716 Tf 6.61 0 Td [(H)]TJ/F52 9.9626 Tf 7.012 -4.115 Td [(D)-52(x)]TJ/F85 10.3811 Tf 15.536 0 Td [(+)]TJ/F60 9.9626 Tf 10.505 0 Td [(b)]TJ/F52 9.9626 Tf 5.648 0 Td [(y)]TJ/F54 9.9626 Tf -201.061 -38.202 Td [(wher)18(e:)]TJ -0 g 0 G -/F52 9.9626 Tf -14.65 -21.265 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.61 0 Td [(x)]TJ/F54 7.5716 Tf 5.201 -1.495 Td [(:)-12(,)-13(:)]TJ -0 g 0 G -/F52 9.9626 Tf -131.167 -20.218 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.087 0 Td [(is)-250(the)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 115.441 0 Td [(y)]TJ/F54 7.5716 Tf 5.201 -1.494 Td [(:)-12(,)-13(:)]TJ +/F59 11.9552 Tf 201.825 681.348 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G -/F52 9.9626 Tf -130.599 -20.218 Td [(T)]TJ 0 g 0 G -/F54 9.9626 Tf 11.432 0 Td [(is)-250(the)-250(global)-250(sparse)-250(block)-250(triangular)-250(submatrix)]TJ/F52 9.9626 Tf 206.797 0 Td [(T)]TJ +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,parts=parts\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vg=vg,[mg=mg,flag=flag]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vl=vl,[nl=nl,globalcheck=.false.,lidx=lidx]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,nl=nl\051)]TJ 0 -11.956 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,repl=.true.\051)]TJ/F62 9.9626 Tf 14.944 -19.771 Td [(This)-377(subr)18(outine)-378(initializes)-377(the)-378(communication)-377(descriptor)-378(associated)-377(with)]TJ -14.944 -11.956 Td [(an)-271(index)-271(space.)-373(One)-272(o)1(f)-272(the)-271(optional)-271(ar)18(guments)]TJ/F67 9.9626 Tf 209.77 0 Td [(parts)]TJ/F62 9.9626 Tf 26.152 0 Td [(,)]TJ/F67 9.9626 Tf 5.244 0 Td [(vg)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)]TJ/F67 9.9626 Tf 5.244 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)]TJ/F67 9.9626 Tf 5.244 0 Td [(nl)]TJ/F62 9.9626 Tf 13.161 0 Td [(or)]TJ/F67 9.9626 Tf 12.076 0 Td [(repl)]TJ/F62 9.9626 Tf 23.622 0 Td [(must)]TJ -321.435 -11.955 Td [(be)-250(speci\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(speci\002c)-250(initialization)-250(strategy)111(.)]TJ 0 g 0 G - -218.159 -21.712 Td [(D)]TJ -0 g 0 G -/F54 9.9626 Tf 12.956 0 Td [(is)-250(the)-250(scaling)-250(diagonal)-250(matrix.)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 6.894 -21.266 Td [(call)]TJ +/F59 9.9626 Tf 0 -18.208 Td [(On)-250(Entry)]TJ 0 g 0 G - [-525(psb_spsm\050alpha,)-525(t,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -14.944 -11.955 Td [(call)]TJ 0 g 0 G - [-525(psb_spsm\050alpha,)-525(t,)-525(x,)-525(beta,)-525(y,)-525(desc_a,)-525(info,)-525(trans,)-525(unit,)-525(choice,)-525(diag,)-525(work\051)]TJ + 0 -19.067 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.067 Td [(icontxt)]TJ 0 g 0 G -ET -q -1 0 0 1 230.392 339.439 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.623 330.871 Td [(T)]TJ/F54 9.9626 Tf 6.451 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(,)]TJ/F52 9.9626 Tf 5.306 0 Td [(D)]TJ/F54 9.9626 Tf 7.975 0 Td [(,)]TJ/F60 9.9626 Tf 5.105 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(,)]TJ/F60 9.9626 Tf 5.355 0 Td [(b)]TJ/F51 9.9626 Tf 64.393 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 327.085 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 236.369 318.517 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 373.603 318.716 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 318.517 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 373.603 306.761 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 306.562 Td [(spsm)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 373.603 294.806 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 294.607 Td [(spsm)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 373.603 282.851 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 282.652 Td [(spsm)]TJ -ET -q -1 0 0 1 230.392 278.866 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q +/F62 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 278.277 250.487 Td [(T)92(able)-250(13:)-310(Data)-250(types)]TJ +/F59 9.9626 Tf -24.907 -19.066 Td [(vg)]TJ 0 g 0 G +/F62 9.9626 Tf 16.06 0 Td [(Data)-250(allocation:)-310(each)-250(index)]TJ/F60 9.9626 Tf 121.707 0 Td [(i)]TJ/F91 10.3811 Tf 5.856 0 Td [(2)-290(f)]TJ/F62 9.9626 Tf 15.245 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F91 10.3811 Tf 13.449 0 Td [(g)]TJ/F62 9.9626 Tf 7.806 0 Td [(is)-250(allocated)-250(to)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 98.454 0 Td [(v)-47(g)]TJ/F93 10.3811 Tf 10.68 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(.)]TJ -295.759 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.907 -19.067 Td [(\003ag)]TJ 0 g 0 G -/F51 9.9626 Tf -127.572 -38.916 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 21.589 0 Td [(Speci\002es)-250(whether)-250(entries)-250(in)]TJ/F60 9.9626 Tf 123.401 0 Td [(v)-47(g)]TJ/F62 9.9626 Tf 13.046 0 Td [(ar)18(e)-250(zer)18(o-)-250(or)-250(one-based.)]TJ -133.129 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0,)-167(1,)-250(default)-250(0.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -24.907 -19.067 Td [(mg)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.712 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 19.377 0 Td [(the)-250(\050global\051)-250(number)-250(of)-250(r)18(ows)-250(of)-250(the)-250(pr)18(oblem.)]TJ 5.53 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-262(as:)-335(an)-263(integer)-262(value.)-348(It)-262(is)-262(r)18(equir)18(ed)-263(if)]TJ/F67 9.9626 Tf 203.091 0 Td [(parts)]TJ/F62 9.9626 Tf 28.766 0 Td [(or)]TJ/F67 9.9626 Tf 11.99 0 Td [(repl)]TJ/F62 9.9626 Tf 23.536 0 Td [(is)-262(speci\002ed,)]TJ -267.383 -11.955 Td [(it)-250(is)-250(optional)-250(if)]TJ/F67 9.9626 Tf 66.141 0 Td [(vg)]TJ/F62 9.9626 Tf 12.951 0 Td [(is)-250(speci\002ed.)]TJ 0 g 0 G +/F59 9.9626 Tf -103.999 -19.067 Td [(parts)]TJ 0 g 0 G - 0 -21.713 Td [(alpha)]TJ +/F62 9.9626 Tf 27.666 0 Td [(the)-250(subr)18(outine)-250(that)-250(de\002nes)-250(the)-250(partitioning)-250(scheme.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(subr)18(outine.)]TJ 0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.368 0 Td [(a)]TJ/F54 9.9626 Tf 5.385 0 Td [(.)]TJ -55.282 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ +/F59 9.9626 Tf -24.907 -19.067 Td [(vl)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 13.838 0 Td [(Data)-293(allocation:)-395(the)-293(set)-292(of)-293(global)-293(i)1(ndices)]TJ/F60 9.9626 Tf 181.166 0 Td [(v)-25(l)]TJ/F93 10.3811 Tf 8.548 0 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(1)-369(:)]TJ/F60 9.9626 Tf 14.955 0 Td [(n)-25(l)]TJ/F93 10.3811 Tf 9.105 0 Td [(\051)]TJ/F62 9.9626 Tf 7.065 0 Td [(belonging)-293(to)-292(the)-293(calling)]TJ -213.919 -11.955 Td [(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ 0 g 0 G - 141.967 -29.888 Td [(50)]TJ + 141.967 -29.887 Td [(70)]TJ 0 g 0 G ET endstream endobj -1196 0 obj +1452 0 obj << -/Length 7295 +/Length 6337 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(t)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(nl)]TJ 0 g 0 G -/F54 9.9626 Tf 8.299 0 Td [(the)-250(global)-250(portion)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 171.221 0 Td [(T)]TJ/F54 9.9626 Tf 6.451 0 Td [(.)]TJ -161.064 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(type)-250(speci\002ed)-250(in)-250(\247)]TJ -0 0 1 rg 0 0 1 RG - [-250(3)]TJ -0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 14.386 0 Td [(Data)-305(allocation:)-421(in)-305(a)-305(generalized)-305(block-r)18(ow)-305(distribution)-306(the)-305(number)-305(of)-305(in-)]TJ 10.521 -11.955 Td [(dices)-250(belonging)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(May)-250(be)-250(speci\002ed)-250(together)-250(with)]TJ/F67 9.9626 Tf 272.943 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.65 Td [(x)]TJ +/F59 9.9626 Tf -308.311 -20.135 Td [(repl)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.614 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -165.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.864 590.037 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 389.002 589.838 Td [(T)]TJ -ET -q -1 0 0 1 394.86 590.037 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 397.998 589.838 Td [(vect)]TJ -ET -q -1 0 0 1 419.547 590.037 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.685 589.838 Td [(type)]TJ +/F62 9.9626 Tf 23.243 0 Td [(Data)-288(allocation:)-385(build)-288(a)-288(r)18(eplicated)-287(index)-288(space)-288(\050i.e.)-423(all)-288(pr)18(ocesses)-287(own)-288(all)]TJ 1.664 -11.955 Td [(indices\051.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F67 9.9626 Tf 132.133 0 Td [(.true.)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-278(numbers)-278(of)-279(type)-278(speci\002ed)-278(in)-278(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-278(13)]TJ +/F59 9.9626 Tf -157.04 -20.135 Td [(globalcheck)]TJ 0 g 0 G - [(.)-395(The)-278(rank)-278(of)]TJ/F52 9.9626 Tf 275.498 0 Td [(x)]TJ/F54 9.9626 Tf 7.977 0 Td [(must)-278(be)]TJ -283.475 -11.956 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 52.946 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ +/F62 9.9626 Tf 59.766 0 Td [(Data)-250(allocation:)-310(do)-250(global)-250(checks)-250(on)-250(the)-250(local)-250(index)-250(lists)]TJ/F67 9.9626 Tf 247.788 0 Td [(vl)]TJ/F62 9.9626 Tf -282.647 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value,)-250(default:)]TJ/F67 9.9626 Tf 162.678 0 Td [(.false.)]TJ 0 g 0 G -/F51 9.9626 Tf -82.959 -20.649 Td [(beta)]TJ +/F59 9.9626 Tf -187.585 -20.135 Td [(lidx)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(scalar)]TJ/F60 9.9626 Tf 44.617 0 Td [(b)]TJ/F54 9.9626 Tf 5.524 0 Td [(.)]TJ -49.583 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(number)-250(of)-250(the)-250(data)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ +/F62 9.9626 Tf 22.685 0 Td [(Data)-308(allocation:)-425(the)-307(set)-308(of)-307(local)-308(indices)]TJ/F60 9.9626 Tf 175.731 0 Td [(l)-48(i)-32(d)-42(x)]TJ/F93 10.3811 Tf 17.065 0 Td [(\050)]TJ/F62 9.9626 Tf 4.15 0 Td [(1)-397(:)]TJ/F60 9.9626 Tf 15.505 0 Td [(n)-25(l)]TJ/F93 10.3811 Tf 9.105 0 Td [(\051)]TJ/F62 9.9626 Tf 7.214 0 Td [(to)-308(be)-307(assigned)-308(to)-307(the)]TJ -226.548 -11.955 Td [(global)-250(indices)]TJ/F60 9.9626 Tf 63.476 0 Td [(v)-25(l)]TJ/F62 9.9626 Tf 8.423 0 Td [(.)]TJ -71.899 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ 0 g 0 G - [(.)]TJ +/F59 9.9626 Tf -24.907 -22.127 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.65 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.105 0 Td [(.)]TJ -166.164 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ + 0 -20.135 Td [(desc)]TJ ET q -1 0 0 1 385.864 429.186 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.408 376.512 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 389.002 428.986 Td [(T)]TJ +/F59 9.9626 Tf 123.397 376.313 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 394.86 429.186 cm +1 0 0 1 309.258 328.692 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 428.986 Td [(vect)]TJ +/F67 9.9626 Tf 312.397 328.492 Td [(desc)]TJ ET q -1 0 0 1 419.547 429.186 cm +1 0 0 1 333.945 328.692 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 428.986 Td [(type)]TJ +/F67 9.9626 Tf 337.084 328.492 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-280(numbers)-280(of)-280(type)-280(speci\002ed)-280(in)-280(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-280(13)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(.)-400(The)-280(rank)-280(of)]TJ/F52 9.9626 Tf 275.562 0 Td [(y)]TJ/F54 9.9626 Tf 7.895 0 Td [(must)-280(be)]TJ -283.457 -11.955 Td [(the)-250(same)-250(of)]TJ/F52 9.9626 Tf 53.115 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ +/F59 9.9626 Tf -258.11 -20.135 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -83.228 -20.65 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 384.625 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 384.426 Td [(a)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -22.128 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 336.805 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 336.605 Td [(desc)]TJ -ET -q -1 0 0 1 298.05 336.805 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 301.189 336.605 Td [(type)]TJ +/F62 9.9626 Tf 12.454 -20.082 Td [(1.)]TJ +0 g 0 G + [-500(One)-309(of)-310(the)-309(optional)-310(ar)18(guments)]TJ/F67 9.9626 Tf 152.661 0 Td [(parts)]TJ/F62 9.9626 Tf 26.152 0 Td [(,)]TJ/F67 9.9626 Tf 5.723 0 Td [(vg)]TJ/F62 9.9626 Tf 10.46 0 Td [(,)]TJ/F67 9.9626 Tf 5.723 0 Td [(vl)]TJ/F62 9.9626 Tf 10.46 0 Td [(,)]TJ/F67 9.9626 Tf 5.723 0 Td [(nl)]TJ/F62 9.9626 Tf 13.544 0 Td [(or)]TJ/F67 9.9626 Tf 12.458 0 Td [(repl)]TJ/F62 9.9626 Tf 24.005 0 Td [(must)-310(be)-309(speci-)]TJ -254.456 -11.956 Td [(\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(initialization)-250(strategy)-250(as)-250(follows:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.135 Td [(parts)]TJ +0 g 0 G +/F62 9.9626 Tf 27.666 0 Td [(In)-242(this)-242(case)-242(we)-243(have)-242(a)-242(subr)18(outine)-242(specifying)-242(the)-242(mapping)-242(between)]TJ -5.748 -11.955 Td [(global)-312(indices)-312(and)-311(pr)18(ocess/local)-312(index)-312(pairs.)-496(If)-311(this)-312(optional)-312(ar)18(gu-)]TJ 0 -11.955 Td [(ment)-230(is)-230(speci\002ed,)-234(then)-230(it)-230(is)-230(mandatory)-230(to)-230(specify)-230(the)-230(ar)18(gument)]TJ/F67 9.9626 Tf 274.929 0 Td [(mg)]TJ/F62 9.9626 Tf 12.752 0 Td [(as)]TJ -287.681 -11.955 Td [(well.)-310(The)-250(subr)18(outine)-250(must)-250(conform)-250(to)-250(the)-250(following)-250(interface:)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -20.649 Td [(trans)]TJ +/F67 9.9626 Tf 10.46 -18.09 Td [(interface)]TJ 15.691 -11.955 Td [(subroutine)-525(psb_parts\050glob_index,mg,np,pv,nv\051)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(specify)-250(with)]TJ/F52 9.9626 Tf 56.398 0 Td [(unitd)]TJ/F54 9.9626 Tf 24.637 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ +/F62 9.9626 Tf 93.899 -29.888 Td [(71)]TJ 0 g 0 G -/F51 9.9626 Tf -83.794 -20.65 Td [(trans)-250(=)-250('N')]TJ +ET + +endstream +endobj +1458 0 obj +<< +/Length 9985 +>> +stream 0 g 0 G -/F54 9.9626 Tf 52.522 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(transposed)-250(matrix)]TJ 0 g 0 G -/F51 9.9626 Tf -52.522 -16.303 Td [(trans)-250(=)-250('T')]TJ +BT +/F67 9.9626 Tf 234.142 706.129 Td [(integer,)-525(intent)-525(\050in\051)-1050(::)-525(glob_index,np,mg)]TJ 0 -11.955 Td [(integer,)-525(intent)-525(\050out\051)-525(::)-525(nv,)-525(pv\050*\051)]TJ -10.461 -11.955 Td [(end)-525(subroutine)-525(psb_parts)]TJ -15.691 -11.956 Td [(end)-525(interface)]TJ/F62 9.9626 Tf -10.461 -17.586 Td [(The)-250(input)-250(ar)18(guments)-250(ar)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 50.869 0 Td [(the)-250(operation)-250(is)-250(with)-250(transposed)-250(matrix.)]TJ +/F59 9.9626 Tf 0 -15.594 Td [(glob)]TJ +ET +q +1 0 0 1 218.61 637.283 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 221.599 637.083 Td [(index)]TJ 0 g 0 G -/F51 9.9626 Tf -50.869 -16.302 Td [(trans)-250(=)-250('C')]TJ +/F62 9.9626 Tf 30.436 0 Td [(The)-250(global)-250(index)-250(to)-250(be)-250(mapped;)]TJ 0 g 0 G -/F54 9.9626 Tf 51.417 0 Td [(the)-250(operation)-250(is)-250(with)-250(conjugate)-250(transposed)-250(matrix.)]TJ -51.417 -20.65 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(t)-15(r)-50(a)-25(n)-25(s)]TJ/F85 10.3811 Tf 25.193 0 Td [(=)]TJ/F52 9.9626 Tf 11.434 0 Td [(N)]TJ/F54 9.9626 Tf -75.267 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ +/F59 9.9626 Tf -54.506 -13.774 Td [(np)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.65 Td [(unitd)]TJ +/F62 9.9626 Tf 17.156 0 Td [(The)-250(number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(mapping;)]TJ 0 g 0 G -/F54 9.9626 Tf 29.878 0 Td [(specify)-250(with)]TJ/F52 9.9626 Tf 56.398 0 Td [(trans)]TJ/F54 9.9626 Tf 23.521 0 Td [(the)-250(operation)-250(to)-250(perform.)]TJ +/F59 9.9626 Tf -17.156 -13.774 Td [(mg)]TJ 0 g 0 G -/F51 9.9626 Tf -84.89 -20.649 Td [(unitd)-250(=)-250('U')]TJ +/F62 9.9626 Tf 19.377 0 Td [(The)-250(total)-250(number)-250(of)-250(global)-250(r)18(ows)-250(in)-250(the)-250(mapping;)]TJ -19.377 -15.594 Td [(The)-250(output)-250(ar)18(guments)-250(ar)18(e:)]TJ 0 g 0 G -/F54 9.9626 Tf 54.186 0 Td [(the)-250(operation)-250(is)-250(with)-250(no)-250(scaling)]TJ +/F59 9.9626 Tf 0 -15.594 Td [(nv)]TJ 0 g 0 G -/F51 9.9626 Tf -54.186 -16.303 Td [(unitd)-250(=)-250('L)74(')]TJ +/F62 9.9626 Tf 16.608 0 Td [(The)-250(number)-250(of)-250(entries)-250(in)]TJ/F67 9.9626 Tf 111.052 0 Td [(pv)]TJ/F62 9.9626 Tf 10.461 0 Td [(;)]TJ 0 g 0 G -/F54 9.9626 Tf 51.785 0 Td [(the)-250(operation)-250(is)-250(with)-250(left)-250(scaling)]TJ +/F59 9.9626 Tf -138.121 -13.774 Td [(pv)]TJ 0 g 0 G -/F51 9.9626 Tf -51.785 -16.302 Td [(unitd)-250(=)-250('R')]TJ +/F62 9.9626 Tf 16.608 0 Td [(A)-393(vector)-394(containing)-393(the)-394(indices)-393(of)-394(the)-394(pr)18(ocesses)-393(to)-394(which)-393(the)]TJ 2.022 -11.955 Td [(global)-357(index)-357(should)-356(be)-357(assigend;)-410(each)-357(entry)-357(must)-357(satisfy)-357(0)]TJ/F91 10.3811 Tf 270.063 0 Td [(\024)]TJ/F60 9.9626 Tf -269.39 -11.956 Td [(p)-25(v)]TJ/F93 10.3811 Tf 10.461 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F69 10.3811 Tf 8.665 0 Td [(<)]TJ/F60 9.9626 Tf 12.71 0 Td [(n)-80(p)]TJ/F62 9.9626 Tf 11.442 0 Td [(;)-382(if)]TJ/F60 9.9626 Tf 16.006 0 Td [(n)-25(v)]TJ/F69 10.3811 Tf 15.409 0 Td [(>)]TJ/F62 9.9626 Tf 12.586 0 Td [(1)-338(we)-338(have)-338(an)-338(index)-338(assigned)-338(to)-338(multiple)]TJ -95.244 -11.955 Td [(pr)18(ocesses,)-250(i.e.)-310(we)-250(have)-250(an)-250(overlap)-250(among)-250(the)-250(subdomains.)]TJ 0 g 0 G -/F54 9.9626 Tf 53.628 0 Td [(the)-250(operation)-250(is)-250(with)-250(right)-250(scaling.)]TJ +/F59 9.9626 Tf -40.548 -15.594 Td [(vg)]TJ 0 g 0 G - 88.34 -29.888 Td [(51)]TJ +/F62 9.9626 Tf 16.06 0 Td [(In)-330(this)-330(case)-330(the)-330(association)-330(between)-331(an)-330(index)-330(and)-330(a)-330(pr)18(ocess)-330(is)-330(spec-)]TJ 5.858 -11.955 Td [(i\002ed)-371(via)-372(an)-371(integer)-372(vector)]TJ/F67 9.9626 Tf 120.986 0 Td [(vg\0501:mg\051)]TJ/F62 9.9626 Tf 41.842 0 Td [(;)-432(each)-372(index)]TJ/F60 9.9626 Tf 58.923 0 Td [(i)]TJ/F91 10.3811 Tf 8.096 0 Td [(2)-506(f)]TJ/F62 9.9626 Tf 17.485 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F91 10.3811 Tf 13.449 0 Td [(g)]TJ/F62 9.9626 Tf 9.016 0 Td [(is)]TJ -289.764 -11.955 Td [(assigned)-381(to)-381(pr)18(ocess)]TJ/F60 9.9626 Tf 91.547 0 Td [(v)-47(g)]TJ/F93 10.3811 Tf 10.68 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.089 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(.)-703(The)-380(vector)]TJ/F67 9.9626 Tf 61.203 0 Td [(vg)]TJ/F62 9.9626 Tf 14.255 0 Td [(must)-381(be)-381(identical)-381(on)-380(all)]TJ -189.127 -11.955 Td [(calling)-354(pr)18(ocesses;)-406(its)-355(entri)1(es)-355(may)-354(have)-354(the)-354(ranges)]TJ/F93 10.3811 Tf 226.209 0 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.888 0 Td [(\000)]TJ/F62 9.9626 Tf 10.515 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F62 9.9626 Tf 7.678 0 Td [(or)]TJ/F93 10.3811 Tf -287.387 -11.955 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F93 10.3811 Tf 11.566 0 Td [(\051)]TJ/F62 9.9626 Tf 6.984 0 Td [(accor)18(ding)-284(to)-285(the)-284(value)-285(of)]TJ/F67 9.9626 Tf 113.068 0 Td [(flag)]TJ/F62 9.9626 Tf 20.922 0 Td [(.)-413(The)-285(size)]TJ/F60 9.9626 Tf 45.955 0 Td [(m)-47(g)]TJ/F62 9.9626 Tf 16.159 0 Td [(may)-285(be)-284(spec-)]TJ -238.895 -11.955 Td [(i\002ed)-349(via)-349(the)-349(optional)-349(ar)18(gument)]TJ/F67 9.9626 Tf 144.092 0 Td [(mg)]TJ/F62 9.9626 Tf 10.46 0 Td [(;)-399(the)-349(default)-349(is)-349(to)-349(use)-349(the)-349(entir)18(e)]TJ -154.552 -11.956 Td [(vector)]TJ/F67 9.9626 Tf 29.937 0 Td [(vg)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)-250(thus)-250(having)]TJ/F67 9.9626 Tf 59.885 0 Td [(mg=size\050vg\051)]TJ/F62 9.9626 Tf 57.534 0 Td [(.)]TJ 0 g 0 G -ET - -endstream -endobj -1202 0 obj -<< -/Length 4541 ->> -stream +/F59 9.9626 Tf -179.735 -15.593 Td [(vl)]TJ 0 g 0 G +/F62 9.9626 Tf 13.838 0 Td [(In)-383(this)-382(case)-383(we)-383(ar)18(e)-382(specifying)-383(the)-383(list)-382(of)-383(indices)]TJ/F67 9.9626 Tf 220.787 0 Td [(vl\0501:nl\051)]TJ/F62 9.9626 Tf 45.655 0 Td [(assigned)]TJ -258.362 -11.955 Td [(to)-401(the)-400(curr)18(ent)-401(pr)18(ocess;)-476(thus,)-438(the)-400(global)-401(pr)18(oblem)-400(size)]TJ/F60 9.9626 Tf 243.82 0 Td [(m)-47(g)]TJ/F62 9.9626 Tf 17.316 0 Td [(is)-401(given)]TJ -261.136 -11.956 Td [(by)-435(the)-435(range)-435(of)-435(the)-435(aggr)18(egate)-435(of)-435(the)-435(individual)-435(vectors)]TJ/F67 9.9626 Tf 259.368 0 Td [(vl)]TJ/F62 9.9626 Tf 14.794 0 Td [(spec-)]TJ -274.162 -11.955 Td [(i\002ed)-429(in)-429(the)-429(calling)-428(pr)18(ocesses.)-847(The)-429(size)-429(may)-429(be)-428(speci\002ed)-429(via)-429(the)]TJ 0 -11.955 Td [(optional)-438(ar)18(gument)]TJ/F67 9.9626 Tf 88.319 0 Td [(nl)]TJ/F62 9.9626 Tf 10.46 0 Td [(;)-532(the)-438(default)-438(is)-438(to)-438(use)-438(the)-438(entir)18(e)-438(vector)]TJ/F67 9.9626 Tf 185.156 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)]TJ -294.396 -11.955 Td [(thus)-364(having)]TJ/F67 9.9626 Tf 57.178 0 Td [(nl=size\050vl\051)]TJ/F62 9.9626 Tf 57.534 0 Td [(.)-652(If)]TJ/F67 9.9626 Tf 19.294 0 Td [(globalcheck=.true.)]TJ/F62 9.9626 Tf 97.774 0 Td [(the)-364(subr)18(outine)]TJ -231.78 -11.955 Td [(will)-403(check)-403(how)-404(many)-403(times)-403(each)-403(entry)-403(in)-404(the)-403(global)-403(index)-403(space)]TJ/F93 10.3811 Tf 0.125 -11.955 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F93 10.3811 Tf 13.449 0 Td [(\051)]TJ/F62 9.9626 Tf 6.245 0 Td [(is)-210(speci\002ed)-211(in)-210(the)-210(input)-210(lists)]TJ/F67 9.9626 Tf 122.836 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)-218(thus)-211(allowin)1(g)-211(for)-210(the)-210(pr)18(es-)]TJ -177.232 -11.956 Td [(ence)-302(of)-302(overlap)-302(in)-302(the)-302(input,)-315(and)-302(checki)1(ng)-302(for)-302(\223orphan\224)-302(indices.)-466(If)]TJ/F67 9.9626 Tf 0 -11.955 Td [(globalcheck=.false.)]TJ/F62 9.9626 Tf 99.377 0 Td [(,)-437(the)-400(subr)18(outine)-400(will)-400(not)-400(check)-400(for)-400(overlap,)]TJ -99.377 -11.955 Td [(and)-255(may)-255(be)-255(signi\002cantly)-255(faster)74(,)-257(but)-255(the)-255(user)-255(is)-255(implicitly)-255(guarantee-)]TJ 0 -11.955 Td [(ing)-250(that)-250(ther)18(e)-250(ar)18(e)-250(neither)-250(orphan)-250(nor)-250(overlap)-250(indices.)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 175.611 706.129 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(u)-25(n)-18(i)-32(t)-25(d)]TJ/F85 10.3811 Tf 26.159 0 Td [(=)]TJ/F52 9.9626 Tf 10.927 0 Td [(U)]TJ/F54 9.9626 Tf -75.726 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(variable.)]TJ +/F59 9.9626 Tf -21.918 -15.594 Td [(lidx)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(choice)]TJ +/F62 9.9626 Tf 22.685 0 Td [(The)-377(optional)-376(ar)18(gument)]TJ/F67 9.9626 Tf 107.528 0 Td [(lidx)]TJ/F62 9.9626 Tf 24.674 0 Td [(is)-377(available)-376(for)-377(those)-377(cases)-377(in)-376(which)]TJ -132.969 -11.955 Td [(the)-361(user)-361(has)-361(alr)18(eady)-361(established)-361(a)-361(global-to-local)-361(mapping;)-416(if)-361(it)-361(is)]TJ 0 -11.955 Td [(speci\002ed,)-253(each)-253(index)-253(in)]TJ/F67 9.9626 Tf 105.128 0 Td [(vl\050i\051)]TJ/F62 9.9626 Tf 28.669 0 Td [(will)-253(be)-252(mapped)-253(to)-253(the)-252(corr)18(esponding)]TJ -133.797 -11.955 Td [(local)-317(index)]TJ/F67 9.9626 Tf 51.649 0 Td [(lidx\050i\051)]TJ/F62 9.9626 Tf 36.612 0 Td [(.)-510(When)-317(specifying)-316(the)-317(ar)18(gument)]TJ/F67 9.9626 Tf 148.638 0 Td [(lidx)]TJ/F62 9.9626 Tf 24.076 0 Td [(the)-317(user)]TJ -260.975 -11.956 Td [(would)-329(also)-330(likely)-329(employ)]TJ/F67 9.9626 Tf 117.394 0 Td [(lidx)]TJ/F62 9.9626 Tf 24.203 0 Td [(in)-329(calls)-330(to)]TJ/F67 9.9626 Tf 46.656 0 Td [(psb_cdins)]TJ/F62 9.9626 Tf 50.355 0 Td [(and)]TJ/F67 9.9626 Tf 20.148 0 Td [(local)]TJ/F62 9.9626 Tf 29.433 0 Td [(in)]TJ -288.189 -11.955 Td [(calls)-250(to)]TJ/F67 9.9626 Tf 33.095 0 Td [(psb_spins)]TJ/F62 9.9626 Tf 49.564 0 Td [(and)]TJ/F67 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ/F62 9.9626 Tf 47.073 0 Td [(;)-250(see)-250(also)-250(sec.)]TJ +0 0 1 rg 0 0 1 RG + [-250(2.3.1)]TJ 0 g 0 G -/F54 9.9626 Tf 33.753 0 Td [(speci\002es)-250(the)-250(update)-250(of)-250(overlap)-250(elements)-250(to)-250(be)-250(performed)-250(on)-250(exit:)]TJ + [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -171.008 -15.593 Td [(nl)]TJ 0 g 0 G -/F59 9.9626 Tf -3.865 -19.925 Td [(psb_none_)]TJ +/F62 9.9626 Tf 14.386 0 Td [(If)-411(this)-411(ar)18(gument)-411(is)-411(speci\002ed)-411(alone)-411(\050i.e.)-793(without)]TJ/F67 9.9626 Tf 223.432 0 Td [(vl)]TJ/F62 9.9626 Tf 10.461 0 Td [(\051)-411(the)-411(r)18(esult)-411(is)-411(a)]TJ -226.361 -11.956 Td [(generalized)-280(r)18(ow-block)-280(distribution)-280(in)-280(which)-281(each)-280(pr)18(ocess)]TJ/F60 9.9626 Tf 257.148 0 Td [(I)]TJ/F62 9.9626 Tf 6.642 0 Td [(gets)-280(as-)]TJ -263.79 -11.955 Td [(signed)-250(a)-250(consecutive)-250(chunk)-250(of)]TJ/F60 9.9626 Tf 135.186 0 Td [(N)]TJ/F60 7.5716 Tf 7.851 -1.808 Td [(I)]TJ/F93 10.3811 Tf 6.316 1.808 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-25(l)]TJ/F62 9.9626 Tf 11.472 0 Td [(global)-250(indices.)]TJ 0 g 0 G +/F59 9.9626 Tf -193.829 -15.593 Td [(repl)]TJ 0 g 0 G - 0 -15.941 Td [(psb_sum_)]TJ +/F62 9.9626 Tf 23.243 0 Td [(This)-418(ar)18(guments)-417(speci\002es)-418(to)-417(r)18(eplicate)-418(all)-417(indices)-418(on)-418(all)-417(pr)18(ocesses.)]TJ -1.325 -11.956 Td [(This)-366(is)-367(a)-366(special)-366(purpose)-366(data)-367(allocation)-366(that)-366(is)-366(useful)-367(in)-366(the)-366(con-)]TJ 0 -11.955 Td [(str)8(uction)-250(of)-250(some)-250(multilevel)-250(pr)18(econditioners.)]TJ 0 g 0 G + -34.371 -19.579 Td [(2.)]TJ 0 g 0 G - 0 -15.94 Td [(psb_avg_)]TJ + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ 0 g 0 G + 154.421 -29.888 Td [(72)]TJ 0 g 0 G - 0 -15.94 Td [(psb_square_root_)]TJ/F54 9.9626 Tf -4.982 -19.925 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F59 9.9626 Tf 38.515 0 Td [(psb_avg_)]TJ/F54 9.9626 Tf -38.515 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +ET + +endstream +endobj +1464 0 obj +<< +/Length 2555 +>> +stream 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(diag)]TJ 0 g 0 G -/F54 9.9626 Tf 24.906 0 Td [(the)-250(diagonal)-250(scaling)-250(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(d)-18(i)-47(a)-47(g)]TJ/F85 10.3811 Tf 18.52 0 Td [(\050)]TJ/F54 9.9626 Tf 4.15 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)-289(=)]TJ/F54 9.9626 Tf 18.002 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)-25(o)-35(s)-25(c)-40(a)-25(l)-48(i)-32(n)-47(g)]TJ/F85 10.3811 Tf 41.384 0 Td [(\051)]TJ/F54 9.9626 Tf -135.182 -11.955 Td [(Speci\002ed)-293(as:)-395(a)-293(rank)-293(one)-293(array)-292(containing)-293(numbers)-293(of)-293(the)-292(type)-293(indicated)]TJ 0 -11.955 Td [(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ 0 g 0 G - [(.)]TJ +BT +/F62 9.9626 Tf 112.349 706.129 Td [(3.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(work)]TJ + [-500(Calling)-260(the)-260(r)18(outine)-260(with)]TJ/F67 9.9626 Tf 120.886 0 Td [(vg)]TJ/F62 9.9626 Tf 13.052 0 Td [(or)]TJ/F67 9.9626 Tf 11.965 0 Td [(parts)]TJ/F62 9.9626 Tf 28.742 0 Td [(implies)-260(that)-260(every)-260(pr)18(ocess)-260(will)-260(scan)]TJ -162.192 -11.955 Td [(the)-250(entir)18(e)-250(index)-250(space)-250(to)-250(\002gur)18(e)-250(out)-250(the)-250(local)-250(indices.)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(a)-250(work)-250(array)111(.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-344(as:)-498(a)-344(rank)-343(one)-344(array)-344(of)-344(the)-344(same)-344(type)-344(of)]TJ/F52 9.9626 Tf 229.679 0 Td [(x)]TJ/F54 9.9626 Tf 8.631 0 Td [(with)-344(the)-344(T)74(ARGET)]TJ -238.31 -11.955 Td [(attribute.)]TJ + -12.453 -19.926 Td [(4.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(On)-250(Return)]TJ + [-500(Overlapped)-250(indices)-250(ar)18(e)-250(possible)-250(with)-250(both)]TJ/F67 9.9626 Tf 201.093 0 Td [(parts)]TJ/F62 9.9626 Tf 28.643 0 Td [(and)]TJ/F67 9.9626 Tf 19.357 0 Td [(vl)]TJ/F62 9.9626 Tf 12.951 0 Td [(invocations.)]TJ 0 g 0 G + -262.044 -19.925 Td [(5.)]TJ 0 g 0 G - 0 -19.925 Td [(y)]TJ + [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F67 9.9626 Tf 175.161 0 Td [(vl)]TJ/F62 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F67 9.9626 Tf 86.235 0 Td [(globalcheck=.true.)]TJ/F62 9.9626 Tf 94.146 0 Td [(,)]TJ -355.443 -11.955 Td [(it)-280(will)-281(perform)-280(a)-280(scan)-281(of)-280(the)-281(index)-280(space)-280(to)-281(sear)18(ch)-280(for)-280(overlap)-281(or)-280(orphan)]TJ 0 -11.955 Td [(indices.)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.445 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -166.165 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(13)]TJ + -12.453 -19.925 Td [(6.)]TJ 0 g 0 G - [(.)]TJ + [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F67 9.9626 Tf 175.161 0 Td [(vl)]TJ/F62 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F67 9.9626 Tf 86.235 0 Td [(globalcheck=.false.)]TJ/F62 9.9626 Tf 99.377 0 Td [(,)]TJ -360.674 -11.956 Td [(no)-338(index)-337(space)-338(scan)-337(will)-338(take)-337(place.)-573(Thus)-337(it)-338(is)-338(the)-337(r)18(esponsibility)-338(of)-337(the)]TJ 0 -11.955 Td [(user)-328(to)-328(make)-328(sur)18(e)-328(that)-328(the)-328(indices)-328(speci\002ed)-328(in)]TJ/F67 9.9626 Tf 209.973 0 Td [(vl)]TJ/F62 9.9626 Tf 13.729 0 Td [(have)-328(neither)-328(orphans)]TJ -223.702 -11.955 Td [(nor)-250(overlaps;)-250(if)-250(this)-250(assumption)-250(fails,)-250(r)18(esults)-250(will)-250(be)-250(unpr)18(edictable.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(info)]TJ + -12.453 -19.925 Td [(7.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-500(Orphan)-417(and)-416(overlap)-417(indices)-416(ar)18(e)-417(impossible)-416(by)-417(constr)8(uction)-417(when)-416(the)]TJ 12.453 -11.955 Td [(subr)18(outine)-250(is)-250(invoked)-250(with)]TJ/F67 9.9626 Tf 121.164 0 Td [(nl)]TJ/F62 9.9626 Tf 12.951 0 Td [(\050alone\051,)-250(or)]TJ/F67 9.9626 Tf 47.372 0 Td [(vg)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)]TJ 0 g 0 G - 141.968 -73.723 Td [(52)]TJ + -49.98 -452.304 Td [(73)]TJ 0 g 0 G ET endstream endobj -1213 0 obj +1475 0 obj << -/Length 7400 +/Length 7006 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.14)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.2)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(gemlt)-250(\227)-250(Entrywise)-250(Product)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(pr)18(oduct)-250(between)-250(two)-250(vectors)]TJ/F52 9.9626 Tf 299.677 0 Td [(x)]TJ/F54 9.9626 Tf 7.697 0 Td [(and)]TJ/F52 9.9626 Tf 19.481 0 Td [(y)]TJ -187.918 -21.112 Td [(d)-25(o)-35(t)]TJ/F83 10.3811 Tf 16.336 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F52 9.9626 Tf 4.274 0 Td [(y)]TJ/F85 10.3811 Tf 5.231 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ/F59 9.9626 Tf -187.465 -21.111 Td [(psb_gemlt\050x,)-525(y,)-525(desc_a,)-525(info\051)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(routine)]TJ 0 g 0 G 0 g 0 G +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdins\050nz,)-525(ia,)-525(ja,)-525(desc_a,)-525(info)-525([,ila,jla]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdins\050nz,ja,desc,info[,jla,mask,lidx]\051)]TJ/F62 9.9626 Tf 14.944 -20.366 Td [(This)-336(subr)18(outine)-335(examines)-336(the)-336(edges)-335(of)-336(the)-336(graph)-335(associated)-336(with)-335(the)-336(dis-)]TJ -14.944 -11.955 Td [(cr)18(etization)-260(mesh)-261(\050and)-260(isomorphic)-260(to)-261(the)-260(sparsity)-260(pattern)-261(of)-260(a)-260(linear)-261(system)-260(co-)]TJ 0 -11.955 Td [(ef)18(\002cient)-238(matrix\051,)-241(storing)-238(them)-239(as)-238(necessary)-238(into)-239(the)-238(communication)-238(descriptor)74(.)]TJ 0 -11.955 Td [(In)-259(the)-260(\002rst)-259(form)-260(the)-259(edges)-259(ar)18(e)-260(speci\002ed)-259(as)-260(pairs)-259(of)-260(indices)]TJ/F60 9.9626 Tf 255.974 0 Td [(i)-47(a)]TJ/F93 10.3811 Tf 7.91 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(,)]TJ/F60 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F93 10.3811 Tf 7.841 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(;)-264(the)-260(start)1(-)]TJ -299.233 -11.956 Td [(ing)-299(index)]TJ/F60 9.9626 Tf 44.948 0 Td [(i)-47(a)]TJ/F93 10.3811 Tf 7.91 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 7.13 0 Td [(should)-299(belong)-299(to)-299(the)-299(curr)18(ent)-299(pr)18(ocess.)-458(In)-299(the)-299(second)-299(form)-299(only)]TJ -67.28 -11.955 Td [(the)-250(r)18(emote)-250(indices)]TJ/F60 9.9626 Tf 83.65 0 Td [(j)-40(a)]TJ/F93 10.3811 Tf 7.841 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 6.64 0 Td [(ar)18(e)-250(speci\002ed.)]TJ 0 g 0 G -ET -q -1 0 0 1 183.035 630.896 cm -[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S -Q -BT -/F52 9.9626 Tf 189.137 622.328 Td [(d)-25(o)-35(t)]TJ/F54 9.9626 Tf 13.444 0 Td [(,)]TJ/F52 9.9626 Tf 5.275 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 91.759 0 Td [(Function)]TJ -ET -q -1 0 0 1 183.035 618.542 cm -[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S -Q -BT -/F54 9.9626 Tf 189.012 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 326.246 610.173 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.235 609.974 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 326.246 598.218 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.235 598.019 Td [(gemlt)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 326.246 586.263 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.235 586.064 Td [(gemlt)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 326.246 574.308 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.235 574.109 Td [(gemlt)]TJ -ET -q -1 0 0 1 183.035 570.323 cm -[]0 d 0 J 0.398 w 0 0 m 177.433 0 l S -Q +/F59 9.9626 Tf -105.423 -20.366 Td [(T)90(ype:)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(14:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.304 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ + 0 -19.305 Td [(nz)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(points)-250(being)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -24.907 -19.305 Td [(ia)]TJ 0 g 0 G +/F62 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G - 0 -19.603 Td [(x)]TJ +/F59 9.9626 Tf -207.221 -19.304 Td [(ja)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 174.06 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 421.777 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 421.578 Td [(T)]TJ -ET -q -1 0 0 1 289.642 421.777 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 421.578 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 421.777 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 317.468 421.578 Td [(type)]TJ +/F62 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F59 9.9626 Tf -207.221 -19.304 Td [(mask)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 29.33 0 Td [(Mask)-247(entries)-248(in)]TJ/F67 9.9626 Tf 69.91 0 Td [(ja)]TJ/F62 9.9626 Tf 10.461 0 Td [(,)-248(they)-247(ar)18(e)-248(inserted)-247(only)-248(when)-247(the)-247(corr)18(esponding)]TJ/F67 9.9626 Tf 213.089 0 Td [(mask)]TJ/F62 9.9626 Tf -297.883 -11.956 Td [(entries)-250(ar)18(e)]TJ/F67 9.9626 Tf 48.139 0 Td [(.true.)]TJ/F62 9.9626 Tf -48.139 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 164.297 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(,)-250(default)]TJ/F67 9.9626 Tf 38.784 0 Td [(.true.)]TJ/F62 9.9626 Tf 31.382 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(y)]TJ +/F59 9.9626 Tf -269.706 -19.305 Td [(lidx)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 173.89 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 342.398 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 342.199 Td [(T)]TJ -ET -q -1 0 0 1 289.642 342.398 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 342.199 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 342.398 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 317.468 342.199 Td [(type)]TJ +/F62 9.9626 Tf 22.685 0 Td [(User)-250(de\002ned)-250(local)-250(indices)-250(for)]TJ/F67 9.9626 Tf 131.117 0 Td [(ja)]TJ/F62 9.9626 Tf 10.46 0 Td [(.)]TJ -139.355 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F59 9.9626 Tf -207.221 -20.366 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ + 0 -19.305 Td [(desc)]TJ ET q -1 0 0 1 120.408 310.84 cm +1 0 0 1 171.218 168.346 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 310.641 Td [(a)]TJ +/F59 9.9626 Tf 174.207 168.146 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(updated)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 263.02 cm +1 0 0 1 360.068 120.525 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 262.82 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 120.326 Td [(desc)]TJ ET q -1 0 0 1 298.05 263.02 cm +1 0 0 1 384.755 120.525 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 262.82 Td [(type)]TJ +/F67 9.9626 Tf 387.893 120.326 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -222.215 -19.602 Td [(On)-250(Return)]TJ + -91.236 -29.888 Td [(74)]TJ +0 g 0 G +ET + +endstream +endobj +1480 0 obj +<< +/Length 3007 +>> +stream 0 g 0 G 0 g 0 G - 0 -19.603 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.68 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -151.4 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 175.993 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 175.794 Td [(T)]TJ -ET -q -1 0 0 1 289.642 175.993 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 175.794 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 175.993 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 317.468 175.794 Td [(type)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(14)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - [(.)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(ila)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(info)]TJ +/F62 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ +/F59 9.9626 Tf -207.221 -19.925 Td [(jla)]TJ 0 g 0 G -/F54 9.9626 Tf 114.88 -29.888 Td [(53)]TJ +/F62 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F60 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ/F59 11.9552 Tf -207.221 -21.918 Td [(Notes)]TJ 0 g 0 G -ET - -endstream -endobj -1218 0 obj -<< -/Length 314 ->> -stream +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G + [-500(This)-250(r)18(outine)-250(may)-250(only)-250(be)-250(called)-250(if)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state;)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 175.611 706.129 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + 0 -19.925 Td [(2.)]TJ 0 g 0 G - 141.968 -603.736 Td [(54)]TJ + [-500(This)-370(r)18(outine)-370(automatically)-370(ignor)18(es)-370(edges)-370(that)-370(do)-370(not)-370(insist)-370(on)-370(the)-370(cur)18(-)]TJ 12.453 -11.955 Td [(r)18(ent)-288(pr)18(ocess,)-298(i.e.)-424(edges)-288(for)-288(which)-288(neither)-288(the)-288(starting)-288(nor)-288(the)-288(end)-288(vertex)]TJ 0 -11.955 Td [(belong)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ +0 g 0 G + -12.453 -19.926 Td [(3.)]TJ +0 g 0 G + [-500(The)-322(second)-323(form)-322(of)-323(this)-322(r)18(outine)-323(will)-322(be)-322(useful)-323(when)-322(dealing)-323(with)-322(user)18(-)]TJ 12.453 -11.955 Td [(speci\002ed)-250(index)-250(mappings;)-250(see)-250(also)]TJ +0 0 1 rg 0 0 1 RG + [-250(2.3.1)]TJ +0 g 0 G + [(.)]TJ +0 g 0 G + 141.968 -314.819 Td [(75)]TJ 0 g 0 G ET endstream endobj -1229 0 obj +1489 0 obj << -/Length 7318 +/Length 5969 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.15)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.3)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(gediv)-250(\227)-250(Entrywise)-250(Division)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(division)-250(between)-250(two)-250(vectors)]TJ/F52 9.9626 Tf 300.604 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)]TJ/F52 9.9626 Tf 19.482 0 Td [(y)]TJ/F54 9.9626 Tf -188.347 -21.112 Td [(/)]TJ/F83 10.3811 Tf 9.054 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.274 0 Td [(/)]TJ/F52 9.9626 Tf 6.286 0 Td [(y)]TJ/F85 10.3811 Tf 5.231 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ/F59 9.9626 Tf -186.967 -21.111 Td [(psb_gediv\050x,)-525(y,)-525(desc_a,)-525(info,)-525([flag\051)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(routine)]TJ 0 g 0 G 0 g 0 G +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdasb\050desc_a,)-525(info)-525([,)-525(mold]\051)]TJ 0 g 0 G +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ ET q -1 0 0 1 183.199 630.896 cm -[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S -Q -BT -/F54 9.9626 Tf 189.301 622.328 Td [(/)-13(,)]TJ/F52 9.9626 Tf 11.437 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(,)]TJ/F52 9.9626 Tf 5.105 0 Td [(y)]TJ/F51 9.9626 Tf 99.043 0 Td [(Function)]TJ -ET -q -1 0 0 1 183.199 618.542 cm -[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S -Q -BT -/F54 9.9626 Tf 189.177 609.974 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 326.41 610.173 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 609.974 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 326.41 598.218 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 598.019 Td [(gediv)]TJ -140.222 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 326.41 586.263 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.399 586.064 Td [(gediv)]TJ -140.222 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 326.41 574.308 cm +1 0 0 1 171.218 625.596 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 329.399 574.109 Td [(gediv)]TJ -ET -q -1 0 0 1 183.199 570.323 cm -[]0 d 0 J 0.398 w 0 0 m 177.104 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 227.467 541.944 Td [(T)92(able)-250(15:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -127.572 -33.34 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.603 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.603 Td [(x)]TJ +/F59 9.9626 Tf 174.207 625.397 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 174.06 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 280.646 421.777 cm +1 0 0 1 360.068 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 283.785 421.578 Td [(T)]TJ +/F67 9.9626 Tf 363.206 577.576 Td [(desc)]TJ ET q -1 0 0 1 289.642 421.777 cm +1 0 0 1 384.755 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 292.781 421.578 Td [(vect)]TJ +/F67 9.9626 Tf 387.893 577.576 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -19.925 Td [(mold)]TJ +0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(index)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F67 9.9626 Tf 221.926 0 Td [(psb)]TJ ET q -1 0 0 1 314.33 421.777 cm +1 0 0 1 413.855 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 317.468 421.578 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 173.89 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -164.61 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ +/F67 9.9626 Tf 416.994 509.83 Td [(T)]TJ ET q -1 0 0 1 280.646 342.398 cm +1 0 0 1 422.851 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 283.785 342.199 Td [(T)]TJ +/F67 9.9626 Tf 425.99 509.83 Td [(base)]TJ ET q -1 0 0 1 289.642 342.398 cm +1 0 0 1 447.539 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 292.781 342.199 Td [(vect)]TJ +/F67 9.9626 Tf 450.677 509.83 Td [(vect)]TJ ET q -1 0 0 1 314.33 342.398 cm +1 0 0 1 472.226 510.029 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 317.468 342.199 Td [(type)]TJ +/F67 9.9626 Tf 475.364 509.83 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ +/F59 9.9626 Tf -345.58 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.603 Td [(desc)]TJ + 0 -19.925 Td [(desc)]TJ ET q -1 0 0 1 120.408 310.84 cm +1 0 0 1 171.218 468.186 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 310.641 Td [(a)]TJ +/F59 9.9626 Tf 174.207 467.987 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 273.363 263.02 cm +1 0 0 1 360.068 420.366 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 276.501 262.82 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 420.166 Td [(desc)]TJ ET q -1 0 0 1 298.05 263.02 cm +1 0 0 1 384.755 420.366 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 301.189 262.82 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -19.602 Td [(\003ag)]TJ +/F67 9.9626 Tf 387.893 420.166 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(check)-280(if)-280(any)-280(of)-280(the)]TJ/F52 9.9626 Tf 84.137 0 Td [(y)]TJ/F85 10.3811 Tf 5.23 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)-343(=)]TJ/F54 9.9626 Tf 19.108 0 Td [(0,)-287(and)-280(in)-280(case)-280(r)18(eturns)-280(err)18(or)-280(halting)-280(the)-280(compu-)]TJ -112.449 -11.956 Td [(tation.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F59 9.9626 Tf 132.133 0 Td [(flag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.true.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -157.04 -19.603 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.917 Td [(Notes)]TJ 0 g 0 G - 0 -19.603 Td [(x)]TJ +/F62 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.849 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -151.111 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(assembled)-250(state.)]TJ -12.453 -19.925 Td [(This)-274(call)-275(will)-274(set)-275(up)-274(all)-275(the)-274(necessary)-275(information)-274(for)-275(the)-274(halo)-275(data)-274(exchanges.)]TJ 0 -11.955 Td [(In)-337(doing)-337(so,)-358(the)-337(library)-337(will)-337(need)-336(to)-337(identify)-337(the)-337(set)-337(of)-337(pr)18(ocesse)1(s)-337(owning)-337(the)]TJ 0 -11.955 Td [(halo)-381(indices)-381(thr)18(ough)-380(the)-381(use)-381(of)-381(the)]TJ/F67 9.9626 Tf 163.76 0 Td [(desc%fnd_owner\050\051)]TJ/F62 9.9626 Tf 87.479 0 Td [(method;)-446(the)-381(owning)]TJ -251.239 -11.956 Td [(pr)18(ocesses)-273(ar)18(e)-273(the)-273(topological)-272(neighbours)-273(of)-273(the)-273(calling)-273(pr)18(ocess.)-379(If)-272(the)-273(user)-273(has)]TJ 0 -11.955 Td [(some)-248(backgr)18(ound)-248(information)-248(on)-248(the)-248(pr)18(ocesses)-248(that)-248(ar)18(e)-248(neighbours)-248(of)-248(the)-248(cur)18(-)]TJ 0 -11.955 Td [(r)18(ent)-274(one,)-281(it)-274(is)-275(possible)-274(to)-274(specify)-275(explicitly)-274(the)-274(list)-275(of)-274(adjacent)-274(pr)18(ocesses)-275(with)-274(a)]TJ 0 -11.955 Td [(call)-327(to)]TJ/F67 9.9626 Tf 30.401 0 Td [(desc%set_p_adjcncy\050list\051)]TJ/F62 9.9626 Tf 125.529 0 Td [(;)-365(this)-327(will)-327(speed)-327(u)1(p)-327(the)-327(subsequent)-327(call)-327(to)]TJ/F67 9.9626 Tf -155.93 -11.955 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 47.073 0 Td [(.)]TJ 0 g 0 G - 85.819 -29.888 Td [(55)]TJ + 119.801 -116.528 Td [(76)]TJ 0 g 0 G ET endstream endobj -1122 0 obj +1377 0 obj << /Type /ObjStm /N 100 -/First 989 -/Length 12429 ->> -stream -252 0 1120 57 1116 114 1124 234 1126 352 1127 411 1128 470 1129 529 1130 588 1131 647 -1132 706 1123 765 1137 872 1133 1029 1134 1173 1135 1319 1139 1466 256 1524 1140 1581 1136 1639 -1144 1772 1141 1920 1142 2065 1146 2212 260 2271 1147 2329 1143 2387 1151 2507 1148 2655 1149 2800 -1153 2947 264 3005 1155 3062 1150 3119 1162 3253 1156 3419 1157 3566 1158 3711 1159 3853 1164 3999 -268 4058 1165 4116 1166 4174 1167 4233 1168 4292 1161 4351 1177 4498 1160 4700 1169 4847 1170 4991 -1171 5137 1172 5284 1173 5435 1174 5586 1175 5737 1179 5883 1176 5941 1184 6075 1181 6214 1186 6359 -272 6418 1187 6476 1183 6535 1195 6682 1182 6875 1188 7023 1189 7167 1190 7314 1191 7461 1192 7604 -1193 7751 1197 7896 1194 7954 1201 8088 1198 8236 1199 8383 1203 8530 1200 8589 1212 8709 1204 8902 -1205 9046 1206 9191 1207 9335 1208 9480 1209 9627 1210 9771 1214 9918 276 9976 1215 10033 1211 10091 -1217 10224 1219 10342 1216 10401 1228 10482 1220 10657 1221 10801 1222 10946 1223 11090 1224 11235 1230 11382 -% 252 0 obj +/First 984 +/Length 11356 +>> +stream +1367 0 1368 59 1364 118 1379 254 1381 372 1378 430 1387 498 1383 655 1384 799 1385 944 +1389 1091 296 1150 1390 1208 1386 1267 1396 1400 1391 1557 1393 1703 1394 1849 1398 1994 1399 2052 +1400 2110 1401 2168 1395 2226 1404 2333 1406 2451 1403 2510 1408 2578 1411 2696 1412 2823 1413 2866 +1414 3073 1415 3311 1416 3587 1410 3823 1402 3881 1407 3939 1423 4036 1419 4193 1420 4334 1421 4481 +1425 4628 300 4687 1426 4745 1422 4804 1428 4937 1430 5055 1427 5113 1435 5207 1432 5346 1437 5493 +304 5552 1438 5610 1434 5669 1442 5802 1433 5959 1439 6102 1440 6245 1444 6392 1441 6450 1446 6557 +1448 6675 308 6734 312 6792 1445 6849 1451 6982 1449 7121 1453 7268 1454 7326 1450 7384 1457 7504 +1455 7643 1459 7801 1461 7860 1456 7919 1463 8066 1465 8184 1466 8242 1467 8300 1468 8358 1469 8416 +1470 8474 1462 8532 1474 8613 1472 8752 1476 8897 316 8956 1473 9014 1479 9134 1477 9273 1481 9431 +1482 9489 1483 9547 1484 9605 1478 9663 1488 9757 1485 9905 1486 10050 1490 10196 320 10255 1491 10313 +% 1367 0 obj << -/D [1117 0 R /XYZ 99.895 716.092 null] +/D [1365 0 R /XYZ 149.705 753.953 null] >> -% 1120 0 obj +% 1368 0 obj << -/D [1117 0 R /XYZ 99.895 504.73 null] +/D [1365 0 R /XYZ 150.705 326.444 null] >> -% 1116 0 obj +% 1364 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R >> +/XObject << /Im4 1362 0 R >> /ProcSet [ /PDF /Text ] >> -% 1124 0 obj +% 1379 0 obj << /Type /Page -/Contents 1125 0 R -/Resources 1123 0 R +/Contents 1380 0 R +/Resources 1378 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R ->> -% 1126 0 obj -<< -/D [1124 0 R /XYZ 149.705 753.953 null] ->> -% 1127 0 obj -<< -/D [1124 0 R /XYZ 150.705 564.444 null] +/Parent 1382 0 R >> -% 1128 0 obj -<< -/D [1124 0 R /XYZ 150.705 504.067 null] ->> -% 1129 0 obj -<< -/D [1124 0 R /XYZ 175.611 506.876 null] ->> -% 1130 0 obj -<< -/D [1124 0 R /XYZ 175.611 494.921 null] ->> -% 1131 0 obj -<< -/D [1124 0 R /XYZ 175.611 482.966 null] ->> -% 1132 0 obj +% 1381 0 obj << -/D [1124 0 R /XYZ 175.611 471.011 null] +/D [1379 0 R /XYZ 98.895 753.953 null] >> -% 1123 0 obj +% 1378 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/Font << /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1137 0 obj +% 1387 0 obj << /Type /Page -/Contents 1138 0 R -/Resources 1136 0 R +/Contents 1388 0 R +/Resources 1386 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1133 0 R 1134 0 R 1135 0 R ] +/Parent 1382 0 R +/Annots [ 1383 0 R 1384 0 R 1385 0 R ] >> -% 1133 0 obj +% 1383 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 416.057 444.603 428.117] +/Rect [419.358 345.485 495.412 357.545] /A << /S /GoTo /D (vdata) >> >> -% 1134 0 obj +% 1384 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 404.102 333.626 416.161] -/A << /S /GoTo /D (table.9) >> +/Rect [377.462 333.53 389.417 345.59] +/A << /S /GoTo /D (table.18) >> >> -% 1135 0 obj +% 1385 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 336.356 323.106 348.415] +/Rect [342.753 266.146 409.811 278.205] /A << /S /GoTo /D (descdata) >> >> -% 1139 0 obj +% 1389 0 obj << -/D [1137 0 R /XYZ 98.895 753.953 null] +/D [1387 0 R /XYZ 149.705 753.953 null] >> -% 256 0 obj +% 296 0 obj << -/D [1137 0 R /XYZ 99.895 716.092 null] +/D [1387 0 R /XYZ 150.705 716.092 null] >> -% 1140 0 obj +% 1390 0 obj << -/D [1137 0 R /XYZ 99.895 560.219 null] +/D [1387 0 R /XYZ 150.705 510.975 null] >> -% 1136 0 obj +% 1386 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F67 913 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1144 0 obj +% 1396 0 obj << /Type /Page -/Contents 1145 0 R -/Resources 1143 0 R +/Contents 1397 0 R +/Resources 1395 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1141 0 R 1142 0 R ] +/Parent 1382 0 R +/Annots [ 1391 0 R 1393 0 R 1394 0 R ] >> -% 1141 0 obj +% 1391 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 340.341 384.376 352.401] -/A << /S /GoTo /D (spdata) >> +/Rect [202.52 554.876 214.475 566.936] +/A << /S /GoTo /D (table.18) >> >> -% 1142 0 obj +% 1393 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 272.595 373.916 284.655] -/A << /S /GoTo /D (descdata) >> ->> -% 1146 0 obj -<< -/D [1144 0 R /XYZ 149.705 753.953 null] ->> -% 260 0 obj -<< -/D [1144 0 R /XYZ 150.705 716.092 null] ->> -% 1147 0 obj -<< -/D [1144 0 R /XYZ 150.705 517.78 null] ->> -% 1143 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1151 0 obj -<< -/Type /Page -/Contents 1152 0 R -/Resources 1150 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1148 0 R 1149 0 R ] +/Rect [407.408 325.46 414.381 339.127] +/A << /S /GoTo /D (figure.4) >> >> -% 1148 0 obj +% 1394 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 340.341 333.567 352.401] -/A << /S /GoTo /D (spdata) >> +/Rect [309.226 301.825 316.2 313.885] +/A << /S /GoTo /D (figure.3) >> >> -% 1149 0 obj +% 1398 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 272.595 323.106 284.655] -/A << /S /GoTo /D (descdata) >> +/D [1396 0 R /XYZ 98.895 753.953 null] >> -% 1153 0 obj +% 1399 0 obj << -/D [1151 0 R /XYZ 98.895 753.953 null] +/D [1396 0 R /XYZ 99.895 464.818 null] >> -% 264 0 obj +% 1400 0 obj << -/D [1151 0 R /XYZ 99.895 716.092 null] +/D [1396 0 R /XYZ 99.895 430.343 null] >> -% 1155 0 obj +% 1401 0 obj << -/D [1151 0 R /XYZ 99.895 517.78 null] +/D [1396 0 R /XYZ 99.895 386.508 null] >> -% 1150 0 obj +% 1395 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F96 1154 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1162 0 obj +% 1404 0 obj << /Type /Page -/Contents 1163 0 R -/Resources 1161 0 R +/Contents 1405 0 R +/Resources 1403 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1121 0 R -/Annots [ 1156 0 R 1157 0 R 1158 0 R 1159 0 R ] +/Parent 1382 0 R >> -% 1156 0 obj +% 1406 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 277.323 440.924 289.383] -/A << /S /GoTo /D (table.12) >> +/D [1404 0 R /XYZ 149.705 753.953 null] >> -% 1157 0 obj +% 1403 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [306.858 208.877 384.376 220.936] -/A << /S /GoTo /D (spdata) >> +/Font << /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1158 0 obj +% 1408 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 140.43 495.412 152.49] -/A << /S /GoTo /D (vdata) >> +/Type /Page +/Contents 1409 0 R +/Resources 1407 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1382 0 R >> -% 1159 0 obj +% 1411 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [379.43 128.475 391.385 140.535] -/A << /S /GoTo /D (table.12) >> +/Producer (GPL Ghostscript 9.22) +/CreationDate (D:20180323100658Z00'00') +/ModDate (D:20180323100658Z00'00') >> -% 1164 0 obj +% 1412 0 obj << -/D [1162 0 R /XYZ 149.705 753.953 null] +/Type /ExtGState +/OPM 1 >> -% 268 0 obj +% 1413 0 obj << -/D [1162 0 R /XYZ 150.705 716.092 null] +/BaseFont /XYUGDR+Times-Roman +/FontDescriptor 1415 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 500 500 500 500 500] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 1165 0 obj +% 1414 0 obj << -/D [1162 0 R /XYZ 290.728 674.17 null] +/BaseFont /XISTAL+Times-Bold +/FontDescriptor 1416 0 R +/Type /Font +/FirstChar 48 +/LastChar 80 +/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] +/Encoding /WinAnsiEncoding +/Subtype /Type1 >> -% 1166 0 obj +% 1415 0 obj << -/D [1162 0 R /XYZ 287.931 654.041 null] +/Type /FontDescriptor +/FontName /XYUGDR+Times-Roman +/FontBBox [ 0 -14 476 688] +/Flags 65568 +/Ascent 688 +/CapHeight 688 +/Descent -14 +/ItalicAngle 0 +/StemV 71 +/MissingWidth 250 +/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) +/FontFile3 1417 0 R >> -% 1167 0 obj +% 1416 0 obj +<< +/Type /FontDescriptor +/FontName /XISTAL+Times-Bold +/FontBBox [ 0 -13 600 688] +/Flags 65568 +/Ascent 688 +/CapHeight 676 +/Descent -13 +/ItalicAngle 0 +/StemV 90 +/MissingWidth 250 +/CharSet (/P/one/zero) +/FontFile3 1418 0 R +>> +% 1410 0 obj << -/D [1162 0 R /XYZ 287.193 633.911 null] +/D [1408 0 R /XYZ 98.895 753.953 null] >> -% 1168 0 obj +% 1402 0 obj << -/D [1162 0 R /XYZ 150.705 447.252 null] +/D [1408 0 R /XYZ 99.895 282.918 null] >> -% 1161 0 obj +% 1407 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F62 667 0 R >> +/XObject << /Im5 1392 0 R >> /ProcSet [ /PDF /Text ] >> -% 1177 0 obj +% 1423 0 obj << /Type /Page -/Contents 1178 0 R -/Resources 1176 0 R +/Contents 1424 0 R +/Resources 1422 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1160 0 R 1169 0 R 1170 0 R 1171 0 R 1172 0 R 1173 0 R 1174 0 R 1175 0 R ] +/Parent 1382 0 R +/Annots [ 1419 0 R 1420 0 R 1421 0 R ] >> -% 1160 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.159 654.503 390.114 666.562] -/A << /S /GoTo /D (table.12) >> ->> -% 1169 0 obj +% 1419 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 588.085 444.603 600.145] +/Rect [419.358 298.8 495.412 310.86] /A << /S /GoTo /D (vdata) >> >> -% 1170 0 obj +% 1420 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [328.746 576.13 340.701 588.189] -/A << /S /GoTo /D (table.12) >> +/Rect [255.331 289.495 267.287 298.905] +/A << /S /GoTo /D (table.19) >> >> -% 1171 0 obj +% 1421 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 497.757 323.106 509.817] +/Rect [342.753 218.115 409.811 230.175] /A << /S /GoTo /D (descdata) >> >> -% 1172 0 obj +% 1425 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [338.139 460.563 345.113 472.623] -/A << /S /GoTo /D (equation.4.1) >> +/D [1423 0 R /XYZ 149.705 753.953 null] >> -% 1173 0 obj +% 300 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [336.486 445.951 343.459 458.011] -/A << /S /GoTo /D (equation.4.2) >> +/D [1423 0 R /XYZ 150.705 716.092 null] >> -% 1174 0 obj +% 1426 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [337.034 431.339 344.007 443.399] -/A << /S /GoTo /D (equation.4.3) >> +/D [1423 0 R /XYZ 150.705 460.417 null] >> -% 1175 0 obj +% 1422 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [202.52 189.579 214.475 201.639] -/A << /S /GoTo /D (table.12) >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1179 0 obj +% 1428 0 obj +<< +/Type /Page +/Contents 1429 0 R +/Resources 1427 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1431 0 R +>> +% 1430 0 obj << -/D [1177 0 R /XYZ 98.895 753.953 null] +/D [1428 0 R /XYZ 98.895 753.953 null] >> -% 1176 0 obj +% 1427 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F60 1027 0 R /F52 585 0 R /F59 812 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1184 0 obj +% 1435 0 obj << /Type /Page -/Contents 1185 0 R -/Resources 1183 0 R +/Contents 1436 0 R +/Resources 1434 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1181 0 R ] +/Parent 1431 0 R +/Annots [ 1432 0 R ] >> -% 1181 0 obj +% 1432 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [428.968 116.52 440.924 128.58] -/A << /S /GoTo /D (table.13) >> +/Rect [342.753 211.064 409.811 223.124] +/A << /S /GoTo /D (descdata) >> >> -% 1186 0 obj +% 1437 0 obj << -/D [1184 0 R /XYZ 149.705 753.953 null] +/D [1435 0 R /XYZ 149.705 753.953 null] >> -% 272 0 obj +% 304 0 obj << -/D [1184 0 R /XYZ 150.705 716.092 null] +/D [1435 0 R /XYZ 150.705 716.092 null] >> -% 1187 0 obj +% 1438 0 obj << -/D [1184 0 R /XYZ 150.705 268.704 null] +/D [1435 0 R /XYZ 150.705 449.977 null] >> -% 1183 0 obj +% 1434 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1195 0 obj +% 1442 0 obj << /Type /Page -/Contents 1196 0 R -/Resources 1194 0 R +/Contents 1443 0 R +/Resources 1441 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1182 0 R 1188 0 R 1189 0 R 1190 0 R 1191 0 R 1192 0 R 1193 0 R ] +/Parent 1431 0 R +/Annots [ 1433 0 R 1439 0 R 1440 0 R ] >> -% 1182 0 obj +% 1433 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [305.144 654.503 312.117 666.562] -/A << /S /GoTo /D (section.3) >> +/Rect [287.51 642.547 363.564 654.607] +/A << /S /GoTo /D (vdata) >> >> -% 1188 0 obj +% 1439 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 586.032 444.603 598.092] +/Rect [123.806 542.921 199.86 554.981] /A << /S /GoTo /D (vdata) >> >> -% 1189 0 obj +% 1440 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [328.621 574.077 340.576 586.136] -/A << /S /GoTo /D (table.13) >> +/Rect [421.516 542.921 433.471 554.981] +/A << /S /GoTo /D (table.20) >> >> -% 1190 0 obj +% 1444 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.159 493.651 390.114 505.711] -/A << /S /GoTo /D (table.13) >> +/D [1442 0 R /XYZ 98.895 753.953 null] >> -% 1191 0 obj +% 1441 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 425.181 444.603 437.24] -/A << /S /GoTo /D (vdata) >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1192 0 obj +% 1446 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [328.746 413.225 340.701 425.285] -/A << /S /GoTo /D (table.13) >> +/Type /Page +/Contents 1447 0 R +/Resources 1445 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1431 0 R >> -% 1193 0 obj +% 1448 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 332.8 323.106 344.859] -/A << /S /GoTo /D (descdata) >> +/D [1446 0 R /XYZ 149.705 753.953 null] >> -% 1197 0 obj +% 308 0 obj +<< +/D [1446 0 R /XYZ 150.705 716.092 null] +>> +% 312 0 obj << -/D [1195 0 R /XYZ 98.895 753.953 null] +/D [1446 0 R /XYZ 150.705 691.48 null] >> -% 1194 0 obj +% 1445 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R /F60 1027 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1201 0 obj +% 1451 0 obj << /Type /Page -/Contents 1202 0 R -/Resources 1200 0 R +/Contents 1452 0 R +/Resources 1450 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1198 0 R 1199 0 R ] +/Parent 1431 0 R +/Annots [ 1449 0 R ] >> -% 1198 0 obj +% 1449 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [211.646 410.079 223.601 419.489] -/A << /S /GoTo /D (table.13) >> +/Rect [291.943 324.687 359.001 336.746] +/A << /S /GoTo /D (descdata) >> >> -% 1199 0 obj +% 1453 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [253.329 228.102 265.284 240.161] -/A << /S /GoTo /D (table.13) >> +/D [1451 0 R /XYZ 98.895 753.953 null] >> -% 1203 0 obj +% 1454 0 obj << -/D [1201 0 R /XYZ 149.705 753.953 null] +/D [1451 0 R /XYZ 99.895 234.157 null] >> -% 1200 0 obj +% 1450 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F52 585 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1212 0 obj +% 1457 0 obj << /Type /Page -/Contents 1213 0 R -/Resources 1211 0 R +/Contents 1458 0 R +/Resources 1456 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R -/Annots [ 1204 0 R 1205 0 R 1206 0 R 1207 0 R 1208 0 R 1209 0 R 1210 0 R ] ->> -% 1204 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 417.772 339.385 429.832] -/A << /S /GoTo /D (vdata) >> ->> -% 1205 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 405.817 231.53 417.877] -/A << /S /GoTo /D (table.2) >> +/Parent 1431 0 R +/Annots [ 1455 0 R ] >> -% 1206 0 obj +% 1455 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 338.393 339.385 350.453] -/A << /S /GoTo /D (vdata) >> +/Rect [405.298 215.702 427.216 227.166] +/A << /S /GoTo /D (subsubsection.2.3.1) >> >> -% 1207 0 obj +% 1459 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 326.438 231.53 338.498] -/A << /S /GoTo /D (table.2) >> +/D [1457 0 R /XYZ 149.705 753.953 null] >> -% 1208 0 obj +% 1461 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 259.015 323.106 271.074] -/A << /S /GoTo /D (descdata) >> +/D [1457 0 R /XYZ 150.705 133.283 null] >> -% 1209 0 obj +% 1456 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 171.988 339.385 184.048] -/A << /S /GoTo /D (vdata) >> +/Font << /F67 913 0 R /F62 667 0 R /F59 665 0 R /F91 914 0 R /F60 666 0 R /F93 915 0 R /F69 1460 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1210 0 obj +% 1463 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [242.868 160.033 254.823 172.093] -/A << /S /GoTo /D (table.14) >> +/Type /Page +/Contents 1464 0 R +/Resources 1462 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1471 0 R >> -% 1214 0 obj +% 1465 0 obj << -/D [1212 0 R /XYZ 98.895 753.953 null] +/D [1463 0 R /XYZ 98.895 753.953 null] >> -% 276 0 obj +% 1466 0 obj << -/D [1212 0 R /XYZ 99.895 716.092 null] +/D [1463 0 R /XYZ 99.895 716.092 null] >> -% 1215 0 obj +% 1467 0 obj << -/D [1212 0 R /XYZ 99.895 560.161 null] +/D [1463 0 R /XYZ 99.895 687.379 null] >> -% 1211 0 obj +% 1468 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [1463 0 R /XYZ 99.895 667.454 null] >> -% 1217 0 obj +% 1469 0 obj << -/Type /Page -/Contents 1218 0 R -/Resources 1216 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1180 0 R +/D [1463 0 R /XYZ 99.895 626.268 null] >> -% 1219 0 obj +% 1470 0 obj << -/D [1217 0 R /XYZ 149.705 753.953 null] +/D [1463 0 R /XYZ 99.895 567.828 null] >> -% 1216 0 obj +% 1462 0 obj << -/Font << /F54 586 0 R /F51 584 0 R >> +/Font << /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1228 0 obj +% 1474 0 obj << /Type /Page -/Contents 1229 0 R -/Resources 1227 0 R +/Contents 1475 0 R +/Resources 1473 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1220 0 R 1221 0 R 1222 0 R 1223 0 R 1224 0 R ] +/Parent 1471 0 R +/Annots [ 1472 0 R ] >> -% 1220 0 obj +% 1472 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 417.772 339.385 429.832] -/A << /S /GoTo /D (vdata) >> +/Rect [342.753 116.52 409.811 128.58] +/A << /S /GoTo /D (descdata) >> >> -% 1221 0 obj +% 1476 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 405.817 231.53 417.877] -/A << /S /GoTo /D (table.2) >> +/D [1474 0 R /XYZ 149.705 753.953 null] >> -% 1222 0 obj +% 316 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 338.393 339.385 350.453] -/A << /S /GoTo /D (vdata) >> +/D [1474 0 R /XYZ 150.705 716.092 null] >> -% 1223 0 obj +% 1473 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 326.438 231.53 338.498] -/A << /S /GoTo /D (table.2) >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1224 0 obj +% 1479 0 obj +<< +/Type /Page +/Contents 1480 0 R +/Resources 1478 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1471 0 R +/Annots [ 1477 0 R ] +>> +% 1477 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 259.015 323.106 271.074] -/A << /S /GoTo /D (descdata) >> ->> -% 1230 0 obj -<< -/D [1228 0 R /XYZ 98.895 753.953 null] +/Rect [278.165 401.451 300.083 413.511] +/A << /S /GoTo /D (subsubsection.2.3.1) >> >> - -endstream -endobj -1236 0 obj +% 1481 0 obj << -/Length 1288 +/D [1479 0 R /XYZ 98.895 753.953 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F54 9.9626 Tf 175.611 706.129 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 331.456 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 334.594 706.129 Td [(T)]TJ -ET -q -1 0 0 1 340.452 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 343.59 706.129 Td [(vect)]TJ -ET -q -1 0 0 1 365.139 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 368.277 706.129 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.453 0 Td [(containing)-354(numbers)-355(of)]TJ -217.119 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(14)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -535.99 Td [(56)]TJ -0 g 0 G -ET - -endstream -endobj -1245 0 obj +% 1482 0 obj << -/Length 7234 +/D [1479 0 R /XYZ 99.895 496.698 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(4.16)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(geinv)-250(\227)-250(Entrywise)-250(Inversion)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(entrywise)-250(inverse)-250(of)-250(a)-250(vector)]TJ/F52 9.9626 Tf 252.097 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(and)-250(puts)-250(it)-250(into)]TJ/F52 9.9626 Tf 69.951 0 Td [(y)]TJ/F54 9.9626 Tf -184.401 -18.334 Td [(/)]TJ/F83 10.3811 Tf 9.054 0 Td [(\040)]TJ/F54 9.9626 Tf 13.272 0 Td [(1)-13(/)]TJ/F52 9.9626 Tf 11.562 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)]TJ/F59 9.9626 Tf -181.059 -18.334 Td [(psb_geinv\050x,)-525(y,)-525(desc_a,)-525(info,)-525([flag\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 183.343 637.562 cm -[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S -Q -BT -/F54 9.9626 Tf 189.446 628.995 Td [(/)-12(,)]TJ/F52 9.9626 Tf 11.437 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(,)]TJ/F52 9.9626 Tf 5.106 0 Td [(y)]TJ/F51 9.9626 Tf 99.042 0 Td [(Function)]TJ -ET -q -1 0 0 1 183.343 625.209 cm -[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S -Q -BT -/F54 9.9626 Tf 189.321 616.641 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 326.555 616.84 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.544 616.641 Td [(geinv)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 326.555 604.885 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.544 604.686 Td [(geinv)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 326.555 592.93 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.544 592.731 Td [(geinv)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 326.555 580.975 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 329.544 580.775 Td [(geinv)]TJ -ET -q -1 0 0 1 183.343 576.99 cm -[]0 d 0 J 0.398 w 0 0 m 176.815 0 l S -Q -0 g 0 G -BT -/F54 9.9626 Tf 227.467 548.611 Td [(T)92(able)-250(16:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -127.572 -29.451 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -18.492 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -18.491 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(vector)]TJ/F52 9.9626 Tf 174.06 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -164.321 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 434.555 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 434.356 Td [(T)]TJ -ET -q -1 0 0 1 289.642 434.555 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 434.356 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 434.555 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 317.468 434.356 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(2)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.492 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 404.108 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 403.909 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 132.243 0 Td [(psb)]TJ -ET -q -1 0 0 1 273.363 356.288 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 276.501 356.088 Td [(desc)]TJ -ET -q -1 0 0 1 298.05 356.288 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 301.189 356.088 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -222.215 -18.491 Td [(\003ag)]TJ -0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(check)-278(if)-279(any)-278(of)-278(the)]TJ/F52 9.9626 Tf 84.227 0 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)-340(=)]TJ/F54 9.9626 Tf 19.049 0 Td [(0,)-285(and)-279(in)-278(case)-279(r)18(eturns)-278(err)18(or)-278(halting)-279(the)-278(compu-)]TJ -112.58 -11.955 Td [(tation.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 40.677 0 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -108.97 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F59 9.9626 Tf 132.133 0 Td [(flag)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(.true.)]TJ -0 g 0 G -/F51 9.9626 Tf -157.04 -18.492 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -18.491 Td [(y)]TJ -0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.849 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -151.669 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-354(as:)-519(an)-355(object)-354(of)-355(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 139.526 0 Td [(psb)]TJ -ET -q -1 0 0 1 280.646 205.171 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 283.785 204.972 Td [(T)]TJ -ET -q -1 0 0 1 289.642 205.171 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 292.781 204.972 Td [(vect)]TJ -ET -q -1 0 0 1 314.33 205.171 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 317.468 204.972 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 24.452 0 Td [(containing)-354(numbers)-355(of)]TJ -217.118 -11.955 Td [(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(16)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -18.492 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -36.266 Td [(57)]TJ -0 g 0 G -ET - -endstream -endobj -1251 0 obj +% 1483 0 obj << -/Length 623 +/D [1479 0 R /XYZ 99.895 474.179 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 150.705 706.042 Td [(5)-1000(Communication)-250(routines)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-303(r)18(outines)-302(in)-303(this)-303(chapter)-302(implement)-303(various)-303(global)-302(communication)-303(opera-)]TJ 0 -11.955 Td [(tors)-271(on)-271(vectors)-271(associated)-271(with)-271(a)-272(discr)18(etization)-271(mesh.)-373(For)-271(auxiliary)-271(communi-)]TJ 0 -11.955 Td [(cation)-250(r)18(outines)-250(not)-250(tied)-250(to)-250(a)-250(discr)18(etization)-250(space)-250(see)]TJ -0 0 1 rg 0 0 1 RG - [-250(6)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G - 166.874 -568.992 Td [(58)]TJ -0 g 0 G -ET - -endstream -endobj -1259 0 obj +% 1484 0 obj << -/Length 6340 +/D [1479 0 R /XYZ 99.895 430.343 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(5.1)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(halo)-250(\227)-250(Halo)-250(Data)-250(Communication)]TJ/F54 9.9626 Tf -51.121 -19.15 Td [(These)-250(subr)18(outines)-250(gathers)-250(the)-250(values)-250(of)-250(the)-250(halo)-250(elements:)]TJ/F52 9.9626 Tf 158.568 -25.014 Td [(x)]TJ/F83 10.3811 Tf 8.097 0 Td [(\040)]TJ/F52 9.9626 Tf 13.567 0 Td [(x)]TJ/F54 9.9626 Tf -180.232 -22.11 Td [(wher)18(e:)]TJ -0 g 0 G -/F52 9.9626 Tf 0.294 -20.212 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 10.187 0 Td [(is)-250(a)-250(global)-250(dense)-250(submatrix.)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -ET -q -1 0 0 1 179.582 596.326 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F60 9.9626 Tf 185.685 587.758 Td [(a)]TJ/F54 9.9626 Tf 5.384 0 Td [(,)]TJ/F52 9.9626 Tf 5.276 0 Td [(x)]TJ/F51 9.9626 Tf 110.13 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 179.582 583.972 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 185.56 575.404 Td [(Integer)-8983(psb)]TJ -ET -q -1 0 0 1 322.794 575.603 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 575.404 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 322.794 563.648 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 563.449 Td [(halo)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 322.794 551.693 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 551.494 Td [(halo)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 322.794 539.738 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 325.783 539.539 Td [(halo)]TJ -140.223 -11.956 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ -ET -q -1 0 0 1 322.794 527.783 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q +% 1478 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1488 0 obj +<< +/Type /Page +/Contents 1489 0 R +/Resources 1487 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1471 0 R +/Annots [ 1485 0 R 1486 0 R ] +>> +% 1485 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 573.77 409.811 585.83] +/A << /S /GoTo /D (descdata) >> +>> +% 1486 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 416.361 409.811 428.42] +/A << /S /GoTo /D (descdata) >> +>> +% 1490 0 obj +<< +/D [1488 0 R /XYZ 149.705 753.953 null] +>> +% 320 0 obj +<< +/D [1488 0 R /XYZ 150.705 716.092 null] +>> +% 1491 0 obj +<< +/D [1488 0 R /XYZ 150.705 326.302 null] +>> + +endstream +endobj +1497 0 obj +<< +/Length 3168 +>> +stream +0 g 0 G +0 g 0 G BT -/F54 9.9626 Tf 325.783 527.583 Td [(halo)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.4)-1000(psb)]TJ ET q -1 0 0 1 179.582 523.798 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 227.467 495.419 Td [(T)92(able)-250(17:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -127.572 -24.102 Td [(call)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G - [-525(psb_halo\050x,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_halo\050x,)-525(desc_a,)-525(info,)-525(work,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(data)]TJ -0 g 0 G - [(\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.301 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_cdcpy\050desc_in,)-525(desc_out,)-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.308 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -20.309 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 89.688 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -79.949 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ + 0 -19.925 Td [(desc)]TJ ET q -1 0 0 1 385.864 348.823 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.408 625.596 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 389.002 348.623 Td [(T)]TJ +/F59 9.9626 Tf 123.397 625.397 Td [(in)]TJ +0 g 0 G +/F62 9.9626 Tf 14.386 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -12.981 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 394.86 348.823 cm +1 0 0 1 309.258 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 397.998 348.623 Td [(vect)]TJ +/F67 9.9626 Tf 312.397 577.576 Td [(desc)]TJ ET q -1 0 0 1 419.547 348.823 cm +1 0 0 1 333.945 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 422.685 348.623 Td [(type)]TJ +/F67 9.9626 Tf 337.084 577.576 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf -297.883 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(17)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.308 Td [(desc)]TJ + 0 -19.925 Td [(desc)]TJ ET q -1 0 0 1 120.408 316.559 cm +1 0 0 1 120.408 535.932 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 316.36 Td [(a)]TJ +/F59 9.9626 Tf 123.397 535.733 Td [(out)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 19.925 0 Td [(the)-250(communication)-250(descriptor)-250(copy)111(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 268.738 cm +1 0 0 1 309.258 488.112 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 268.539 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 487.912 Td [(desc)]TJ ET q -1 0 0 1 333.945 268.738 cm +1 0 0 1 333.945 488.112 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 268.539 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F67 9.9626 Tf 337.084 487.912 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.308 Td [(work)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F52 9.9626 Tf 218.454 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -248.566 -20.309 Td [(data)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(index)-250(list)-250(selector)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)74(.)-290(V)92(alues:)]TJ/F59 9.9626 Tf 136.507 0 Td [(psb_comm_halo_)]TJ/F54 9.9626 Tf 73.224 0 Td [(,)]TJ/F59 9.9626 Tf 2.491 0 Td [(psb_comm_mov_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_comm_ext_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ -352.714 -11.955 Td [(default:)]TJ/F59 9.9626 Tf 39.042 0 Td [(psb_comm_halo_)]TJ/F54 9.9626 Tf 73.225 0 Td [(.)-634(Chooses)-358(the)-358(index)-358(list)-358(on)-357(which)-358(to)-358(base)-358(the)]TJ -112.267 -11.955 Td [(data)-250(exchange.)]TJ -0 g 0 G - 141.968 -29.888 Td [(59)]TJ + 141.968 -329.728 Td [(77)]TJ 0 g 0 G ET endstream endobj -1266 0 obj +1502 0 obj << -/Length 3039 +/Length 2167 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(x)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.5)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(cdfree)-250(\227)-250(Frees)-250(a)-250(communication)-250(descriptor)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F52 9.9626 Tf 117.085 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Returned)-285(as:)-381(a)-285(rank)-285(one)-286(or)-285(two)-285(array)-285(containing)-285(numbers)-286(of)-285(type)-285(speci-)]TJ 0 -11.955 Td [(\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(17)]TJ 0 g 0 G - [(.)]TJ +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdfree\050desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(info)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(the)-250(local)-250(portion)-250(of)-250(r)18(esult)-250(submatrix)]TJ/F52 9.9626 Tf 160.68 0 Td [(y)]TJ/F54 9.9626 Tf 5.106 0 Td [(.)]TJ -164.68 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value)-250(that)-250(contains)-250(an)-250(err)18(or)-250(code.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -19.925 Td [(desc)]TJ ET -1 0 0 1 210.511 336.406 cm q -.45 0 0 .45 0 0 cm -q -1 0 0 1 0 0 cm -/Im4 Do -Q +1 0 0 1 171.218 625.596 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q -0 g 0 G -1 0 0 1 -210.511 -336.406 cm BT -/F54 9.9626 Tf 240.086 304.526 Td [(Figur)18(e)-250(3:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ -0 g 0 G +/F59 9.9626 Tf 174.207 625.397 Td [(a)]TJ 0 g 0 G -/F51 11.9552 Tf -89.381 -23.91 Td [(Usage)-325(Example)]TJ/F54 9.9626 Tf 87.482 0 Td [(Consider)-325(the)-325(discr)18(etization)-324(mesh)-325(depicted)-325(in)-325(\002g.)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)-250(to)-250(be)-250(fr)18(eed.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG - [-325(3)]TJ -0 g 0 G - [(,)-343(parti-)]TJ -87.482 -11.956 Td [(tioned)-219(among)-220(two)-219(pr)18(ocesses)-220(as)-219(shown)-220(b)1(y)-220(the)-219(dashed)-220(line;)-229(the)-220(data)-219(distribution)]TJ 0 -11.955 Td [(is)-343(such)-342(that)-343(each)-343(pr)18(ocess)-343(will)-342(own)-343(32)-343(entries)-343(in)-342(the)-343(index)-343(space,)-366(with)-342(a)-343(halo)]TJ 0 -11.955 Td [(made)-355(of)-355(8)-355(entries)-355(place)1(d)-355(at)-355(local)-355(indices)-355(33)-355(thr)18(ough)-355(40.)-624(If)-355(pr)18(ocess)-355(0)-355(assigns)]TJ 0 -11.955 Td [(an)-280(initial)-280(value)-280(of)-281(1)-280(to)-280(its)-280(entries)-280(in)-280(the)]TJ/F52 9.9626 Tf 173.857 0 Td [(x)]TJ/F54 9.9626 Tf 7.997 0 Td [(vector)74(,)-288(and)-280(pr)18(ocess)-280(1)-280(assigns)-280(a)-280(value)]TJ -181.854 -11.955 Td [(of)-314(2,)-329(then)-313(after)-314(a)-314(c)1(a)-1(l)1(l)-314(to)]TJ/F59 9.9626 Tf 106.994 0 Td [(psb_halo)]TJ/F54 9.9626 Tf 44.966 0 Td [(the)-314(conten)1(ts)-314(of)-314(the)-313(local)-314(vectors)-313(will)-314(be)-313(the)]TJ -151.96 -11.955 Td [(following:)]TJ -0 g 0 G - 166.874 -118.447 Td [(60)]TJ -0 g 0 G +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET - -endstream -endobj -1262 0 obj -<< -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/try8x8.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 1269 0 R -/BBox [0 0 498 439] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 1270 0 R ->>/Font << /R8 1271 0 R/R10 1272 0 R>> ->> -/Length 3349 -/Filter /FlateDecode ->> -stream -xœ]›½Ž$¹„ý~ŠötkìÿªXtÒj½ÃY}Ð-q’¡×WWfDFr±Æ$ɬªo›1d%ç÷gù¨ÏrÿÃÏ×o¿Ìç¯ÿ}Œãù¿‡=¿üù1Žùœs<³hÔñü÷ã×^Ù½ŽÔ}ì®c©¿õÅþVzê_=ú©þ>⩽¦ÇŽí'ž»!ïˆw È;\)ònÒBBZÃ!-$äݤ…5=ÖOÈztRZHLk€Óâ•/©ÇDµX=&¬·œÖcâZ ¼× b!r+“ÈÙ@¶xå €ì1‘­d‰ì-Gö˜ÈÖ²Ç5ÙGù¬|‡|7ˆ|Ç+_@d‹ùnÙâ@¶-ä»Ed‹k ²¹×И…D¶-^ù {LdkÙc"{Ë‘=&²µ€ìqÍ@ö‘@>Cqò)ÍY¼òD>³ì¬Eä3 Ï[@>³ô¬Eä3‹ÏiŠy´Ÿ…aeMò³xå èf-ËÏZô³–åç-8ZËò³=­eù9M€>åyÝœÅFê@l#mÄH?Ki³h¤œy¤ž1ÒŽ–FêÙb¤®+Ô‘ ¬ü|cîÇÌ3FŒ9FŒ™#7³Fê#Æ#ÆŒcÖH1ÇÈ!€•ŸïÌ¥‰ùŽÅì#ÎÌgƈ1ÇH=cÄ™9âÌ>âÌ1RDàÌ9°òó¹+˜-fŒsŒ3GnfÔ3FŒ9FŒ#Ƭ‘"cŽ‘C+?ߘëÁlq0cĘcĘ9r3k¤ž1bÌ1bÌ1fsŒXùùÎ<¤A‹Å<¤Áqæ! j¤ž1âÌ#k#Î<²922Á!€•ŸoÌ—$xe^à•õwI~WVß%ñ]Y{—¤weå]Þ•tw…ì®MuIt›æ’ä6Å%ÁmzKrÛԖĶi-I-+MBK½ð9 Ë1>úó(çÇ,Ým;V¥¯Ç(ó£ñ7P?#¿Óz_þq"ûýmó»ÜÁëw¿›`ùÁòz|ÛíÌÚÑìwM,¤…fÍ~F~§Ã:Z?kF³ßE±Öš5í©ž, Í]ËÌ`ÍŒf¿ cí/4kÞÈï4R •¹¡U|ö$´»é3Ðü.ßvsò°ohSaAB»›†æù~éhíèÍ~ëÇŽFhÖ´§z~° dáµ2š-Eb‡%4kÞÈï´EG«kmh…Ÿ}éÚÝt€E4ÜåÛn5öM¶|Š=aBëò;Ýhc“­¯bß(4k:@Èwù¶¹‹Eï/õ}3^<1sÁ„˜Ýatε©Û³>øµI`AÌ&GR€0wªÆIØ¿ÿüúOQu}ýçN5h=u÷ |ù§¨z|ù·õœÅîOïžó¶û××cdg¶ÌóôXñ½°ø³y‰ìÛ¶Ö -;ãÍï,_0 Ž 7k‚3ãŠM·à¼¿Dz,íÙ·qž<à¬åõ|Ád8¬Ðlåã…ëͺâM༿Dz¬áçpŸ?,pw SpeŸV.Å<î®cB`jgý%Òc±æp0©È>ðùÃÔxó»åõ|Á$8®¹,vŸÂõf_ñNFpÞ_"=Ve/,ÐÖWøù»­în`\òµmqeqÏ‚0‹÷H ®CHåàÆ&3°x×pÖ€‘œ-¯§,„[áú‰ù˜›&ÔÀd.¹œŒfÅì…e1,jÙ’µ¤07®Æ©Ø•@!Lqu aî\ƒFT7¡ fâJî–÷±ÃÝŽ+ÜM/»Þ·×üÀ"Îw·ÈZh!öùA¾™¿ÂóùÄý­ß˜?´Cœõá¾/¬ƒÏûAúÈ~—ö•ÒïüúÉi™Y“âˆì 1 ÃÄ<qVÖÆ¢4"›,YIsCk1#›0B3¡õ¤‹¹¡ð§ºÙ -U1š\o{¯Wµçlµ…ëé}ùëÑJ“ë­´\ZŽ!¶Ybþín¨جòþÖ_µ«Oâã;¼ª'®7wÓ›þàs×[i¸´"óØ]Œù·»©|Þ_µµOæÃûºªÝ'®7wS"øÜõVZ .-ÊÏÌW9p=òYÕþV<‰ï檶 ä뜸ùÌõVZ .­ËïŸÚÿŠ'óÁõ¦6¨¸Þ]/ÊœÁçý‡Ö†ªp.¾Êy ëÏú§¶ÀâI|t½©=*ù:ç…®>ë?´6T5|t1䜺îoýS»`ñ$>ºÞÔ6×»ëEi9ø¼ÿÐÚP•‹«+󕘸ø¬j#,žÌ7¤ºø:ç…®¾N}0ÿÐ qÖ‡»[”̓Ïû§öÂâI|—ä®ç×ONËÌê˜Gd±J³0|Ìó'aem,J#²gÙ\O…Ÿõ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®·Õ}›v¬½•p=Uí_^K¸žWƒ‘_´FCl³ÄüÛÝpÁf•÷·þ¦±x+¹M;V\﵇ª¢žçý%%ŠÖh»‹1ÿv7H>ïoÚ‹'ó¡jÛ´cÅõ^€¨ªLàyÞ_Re¢h†xf¾Êy€ë‘Ïú›vÄâI|¬Ð6íXÉ×9/p=òYI剢5šÇîbÌ?8p=Þßú›vÄâI|¬Æ6íXq½—"ªjxž÷—T£(Z£!^™¯Ä<¸ë‘Ïú›vÄâÉ|Cú`á|ó×#_§>˜_´FCœõa7ÁçýM;bñ$¾Kòˆúƒ_?9-3«cR‘]b•f!} ù‹“°²6¥Ùmd×Ëe×–ö¬ñÕ‹oÞöÅã÷n&´žt17´þT7[¡*fBK®—ÎÁx ›ªZèàÏ]-r½SkC”yñÔŒÏòÍÝN.xëŸÚ‹'ñÑõ¦v¬¸WU.ð<ôkm¨S1࣋!ßÜ-5Ÿ÷OíˆÅ“ùàzS;V\²«Šxúµ6Ô òÁÅÀW9t½©ò¦øÊ>¿t½©+ù:ç…®>ï×ÚP§]ÀGCþÁy ëMU0ü¹ÈŸ×îzéÜŠÇp±© -†¸úµ6ÔÉ–O¹¬ÌWbàzSE 0¬àË®—Ψxܳ>P…Uƒ|úˆò¬Öhˆ³>ÜÝâð^ðyÿÔŽX<‰ï’<ÂÇfÔ1â@aÐMŠC5Ú¢"mZ½!qVÖÆ¢4"{^›ëéŠÏúÜÐZÌÈ&ŒÐÅLh=ébnh#ü©n¶BUÌ„&×]Õ ÝÅFS5Cg_QUÍðӂȯZ£!ö²{U5'"ýÈCS5 žßÛæzÖæI¿®+®÷ó'MÕ <Ïû«Ö†:%>A©ªfè\dðy׎X<™§úºv¬¸Þ¡4U3ð<ï¯ZêD ùp¥ªš¡â³þ®±xOðuíXÉ×9/<ŠRUÍÀ‰¿|p+q¥ªš¡³¨Áçý];bñ$>žÖëÚ±âz?ŽÒTÍÀó¼¿jm¨“~/žú[™¯Ä<¸ë‘Ïú»vÄâÉ|Cú€‹‘¯s^x(¥ªšá]ú€[!ÎúðS)MÕ ÞPÌïms=k_’} ×ONËÌê˜Gd×X¥YgSjT3âÔr°-J#²{Ë®ç_ŤŒ¹¡µ˜‘M¡‹™ÐzÒÅÜÐFøSÝl…ª˜ M®÷Ó[ÎÏŸŸåùËãý¤†¿õ8šÇ÷‰<ÿÓ”ÿüúøýQ=Æ×oÏ?~}üøåzÖöüúχÿeK}ŽãýÜò±ž_{üôCýô¹Ï£üÐ>}¥”:~ü<ðóÄωŸ×§Ÿ¿þåíÝíýŸšÇóë/Îñé뿟ߟvñvÿ®Ý¾k×ïÚeoë»öõ]{Þí·¯??·~¢ûCÞŸ#²ÚaYïá÷…–aXŽí)Ãüã—÷'W¯û³êíãýŸ¼žkà®3Ô{οþõÝ*ŸþðøÓ×Çßßÿþ{Ü -endstream -endobj -1275 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 13073 ->> -stream -xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs -ÏÇo”dz%!)&cÒ攤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ -¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ -ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– -Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ -Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 -«CÁ -»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w -v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7Š†%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ -ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ -èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ÎœG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøȪ‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) -sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/ÌåÆæªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²㶢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( -Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ -ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï -;žÓ¸‹FÄd4½…‚ïLûí“‹=wŽ°%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ -GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ×Ø.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïؤžAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ -‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ß•Õ¼Ò2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ -œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu -¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr -0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äó澞7á= -%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl -»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•Ñµa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçƱ€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 -ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ -ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( -½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η - Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi -õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø -¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ -Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*Ž†Ž7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@Ë©Ô·W‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥ÅgrŽ§t0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª -ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& -‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä -]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² - ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ -SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døѪ‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. -¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæ؇ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« -g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 -Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E -G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA -à= -®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ -è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞƇŒ¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ -endstream -endobj -1276 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 11578 ->> -stream -xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» -bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o -‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU -‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûĪ¾XËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ -´Õ= -«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið -ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É -|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajMè‚šÉm¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøЉ•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ -SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi -‹» xŸÃk\_¥OÌ -O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôÑ°€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| -ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#Š°6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ -·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²G᧊RjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä -„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy -GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØΓë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO -è ~nè¸Ó\“¡ÍÑeåf -Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QŠ¦Žmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW -å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊá¡žž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ -Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe -€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'ඊ - iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ -Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ -(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígÏ°çÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ -œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ -Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} -lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒÑ‘Ð IòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L -³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië -H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ -Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú -?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” -å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ -Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, -¬Á®¶QU¯Ž§_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm -êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ -ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ -Ê Å±»ò - Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄEéƶîÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG -mÙ_¨&òK - ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë -ó‹‹Ê„²êlŽG™ƒýUª€ÝWŽ¶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ -åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ -Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ -ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ -ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ -endstream -endobj -1279 0 obj -<< -/Length 3048 ->> -stream -0 g 0 G +q +1 0 0 1 360.068 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 387.893 577.576 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G -BT -/F54 8.9664 Tf 209.77 645.656 Td [(Pr)18(ocess)-250(0)-7729(Pr)18(ocess)-250(1)]TJ -31.696 -10.959 Td [(I)-1333(GLOB\050I\051)-1334(X\050I\051)-4663(I)-1333(GLOB\050I\051)-1333(X\050I\051)]TJ -1.462 -10.959 Td [(1)-4607(1)-1754(1.0)-4500(1)-4107(33)-1753(2.0)]TJ 0 -10.959 Td [(2)-4607(2)-1754(1.0)-4500(2)-4107(34)-1753(2.0)]TJ 0 -10.959 Td [(3)-4607(3)-1754(1.0)-4500(3)-4107(35)-1753(2.0)]TJ 0 -10.959 Td [(4)-4607(4)-1754(1.0)-4500(4)-4107(36)-1753(2.0)]TJ 0 -10.959 Td [(5)-4607(5)-1754(1.0)-4500(5)-4107(37)-1753(2.0)]TJ 0 -10.959 Td [(6)-4607(6)-1754(1.0)-4500(6)-4107(38)-1753(2.0)]TJ 0 -10.959 Td [(7)-4607(7)-1754(1.0)-4500(7)-4107(39)-1753(2.0)]TJ 0 -10.958 Td [(8)-4607(8)-1754(1.0)-4500(8)-4107(40)-1753(2.0)]TJ 0 -10.959 Td [(9)-4607(9)-1754(1.0)-4500(9)-4107(41)-1753(2.0)]TJ -4.483 -10.959 Td [(10)-4107(10)-1754(1.0)-4000(10)-4107(42)-1753(2.0)]TJ 0 -10.959 Td [(11)-4107(11)-1754(1.0)-4000(11)-4107(43)-1753(2.0)]TJ 0 -10.959 Td [(12)-4107(12)-1754(1.0)-4000(12)-4107(44)-1753(2.0)]TJ 0 -10.959 Td [(13)-4107(13)-1754(1.0)-4000(13)-4107(45)-1753(2.0)]TJ 0 -10.959 Td [(14)-4107(14)-1754(1.0)-4000(14)-4107(46)-1753(2.0)]TJ 0 -10.959 Td [(15)-4107(15)-1754(1.0)-4000(15)-4107(47)-1753(2.0)]TJ 0 -10.959 Td [(16)-4107(16)-1754(1.0)-4000(16)-4107(48)-1753(2.0)]TJ 0 -10.959 Td [(17)-4107(17)-1754(1.0)-4000(17)-4107(49)-1753(2.0)]TJ 0 -10.958 Td [(18)-4107(18)-1754(1.0)-4000(18)-4107(50)-1753(2.0)]TJ 0 -10.959 Td [(19)-4107(19)-1754(1.0)-4000(19)-4107(51)-1753(2.0)]TJ 0 -10.959 Td [(20)-4107(20)-1754(1.0)-4000(20)-4107(52)-1753(2.0)]TJ 0 -10.959 Td [(21)-4107(21)-1754(1.0)-4000(21)-4107(53)-1753(2.0)]TJ 0 -10.959 Td [(22)-4107(22)-1754(1.0)-4000(22)-4107(54)-1753(2.0)]TJ 0 -10.959 Td [(23)-4107(23)-1754(1.0)-4000(23)-4107(55)-1753(2.0)]TJ 0 -10.959 Td [(24)-4107(24)-1754(1.0)-4000(24)-4107(56)-1753(2.0)]TJ 0 -10.959 Td [(25)-4107(25)-1754(1.0)-4000(25)-4107(57)-1753(2.0)]TJ 0 -10.959 Td [(26)-4107(26)-1754(1.0)-4000(26)-4107(58)-1753(2.0)]TJ 0 -10.959 Td [(27)-4107(27)-1754(1.0)-4000(27)-4107(59)-1753(2.0)]TJ 0 -10.958 Td [(28)-4107(28)-1754(1.0)-4000(28)-4107(60)-1753(2.0)]TJ 0 -10.959 Td [(29)-4107(29)-1754(1.0)-4000(29)-4107(61)-1753(2.0)]TJ 0 -10.959 Td [(30)-4107(30)-1754(1.0)-4000(30)-4107(62)-1753(2.0)]TJ 0 -10.959 Td [(31)-4107(31)-1754(1.0)-4000(31)-4107(63)-1753(2.0)]TJ 0 -10.959 Td [(32)-4107(32)-1754(1.0)-4000(32)-4107(64)-1753(2.0)]TJ 0 -10.959 Td [(33)-4107(33)-1754(2.0)-4000(33)-4107(25)-1753(1.0)]TJ 0 -10.959 Td [(34)-4107(34)-1754(2.0)-4000(34)-4107(26)-1753(1.0)]TJ 0 -10.959 Td [(35)-4107(35)-1754(2.0)-4000(35)-4107(27)-1753(1.0)]TJ 0 -10.959 Td [(36)-4107(36)-1754(2.0)-4000(36)-4107(28)-1753(1.0)]TJ 0 -10.959 Td [(37)-4107(37)-1754(2.0)-4000(37)-4107(29)-1753(1.0)]TJ 0 -10.958 Td [(38)-4107(38)-1754(2.0)-4000(38)-4107(30)-1753(1.0)]TJ 0 -10.959 Td [(39)-4107(39)-1754(2.0)-4000(39)-4107(31)-1753(1.0)]TJ 0 -10.959 Td [(40)-4107(40)-1754(2.0)-4000(40)-4107(32)-1753(1.0)]TJ + 0 -19.925 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 94.641 -105.903 Td [(61)]TJ + 141.968 -397.474 Td [(78)]TJ 0 g 0 G ET endstream endobj -1287 0 obj +1508 0 obj << -/Length 7519 +/Length 5710 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(5.2)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.6)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(ovrl)-250(\227)-250(Overlap)-250(Update)]TJ/F54 9.9626 Tf -51.12 -18.964 Td [(These)-250(subr)18(outines)-250(applies)-250(an)-250(overlap)-250(operator)-250(to)-250(the)-250(input)-250(vector:)]TJ/F52 9.9626 Tf 154.518 -23.824 Td [(x)]TJ/F83 10.3811 Tf 8.097 0 Td [(\040)]TJ/F52 9.9626 Tf 13.497 0 Td [(Q)-42(x)]TJ/F54 9.9626 Tf -176.112 -21.014 Td [(wher)18(e:)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(cdbldext)-190(\227)-190(Build)-190(an)-190(extended)-190(communication)-190(descrip-)]TJ -24.221 -13.948 Td [(tor)]TJ 0 g 0 G -/F52 9.9626 Tf 0.294 -19.203 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 10.186 0 Td [(is)-250(the)-250(global)-250(dense)-250(submatrix)]TJ/F52 9.9626 Tf 131.351 0 Td [(x)]TJ +/F67 9.9626 Tf -26.9 -19.693 Td [(call)-525(psb_cdbldext\050a,desc_a,nl,desc_out,)-525(info,)-525(extype\051)]TJ/F62 9.9626 Tf 14.944 -23.422 Td [(This)-379(subr)18(outine)-379(builds)-379(an)-379(extended)-379(communication)-379(descriptor)74(,)-411(based)-379(on)]TJ -14.944 -11.955 Td [(the)-428(input)-428(descriptor)]TJ/F67 9.9626 Tf 95.499 0 Td [(desc_a)]TJ/F62 9.9626 Tf 35.646 0 Td [(and)-428(on)-428(the)-428(stencil)-428(speci\002ed)-428(thr)18(ough)-428(the)-427(input)]TJ -131.145 -11.955 Td [(sparse)-250(matrix)]TJ/F67 9.9626 Tf 62.107 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G - -141.607 -19.564 Td [(Q)]TJ +/F59 9.9626 Tf -67.337 -21.054 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.857 0 Td [(is)-250(the)-250(overlap)-250(operator;)-250(it)-250(is)-250(the)-250(composition)-250(of)-250(two)-250(operators)]TJ/F52 9.9626 Tf 271.931 0 Td [(P)]TJ/F52 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F54 9.9626 Tf 6.445 1.494 Td [(and)]TJ/F52 9.9626 Tf 19.681 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.616 Td [(.)]TJ +/F59 9.9626 Tf -29.828 -21.429 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -21.43 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)-250(Scope:)]TJ/F59 9.9626 Tf 100.691 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -107.326 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(type.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.907 -21.429 Td [(desc)]TJ ET q -1 0 0 1 230.392 581.71 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 120.408 504.147 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 236.663 573.142 Td [(x)]TJ/F51 9.9626 Tf 120.622 0 Td [(Subroutine)]TJ +/F59 9.9626 Tf 123.397 503.948 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 230.392 569.356 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 309.258 456.326 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 236.369 560.788 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F67 9.9626 Tf 312.397 456.127 Td [(Tspmat)]TJ ET q -1 0 0 1 373.603 560.988 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 344.406 456.326 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 560.788 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F67 9.9626 Tf 347.544 456.127 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.571 -21.43 Td [(nl)]TJ +0 g 0 G +/F62 9.9626 Tf 14.386 0 Td [(the)-250(number)-250(of)-250(additional)-250(layers)-250(desir)18(ed.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F60 9.9626 Tf 131.102 0 Td [(n)-25(l)]TJ/F91 10.3811 Tf 11.873 0 Td [(\025)]TJ/F62 9.9626 Tf 10.961 0 Td [(0.)]TJ +0 g 0 G +/F59 9.9626 Tf -178.843 -21.43 Td [(extype)]TJ +0 g 0 G +/F62 9.9626 Tf 34.869 0 Td [(the)-250(kind)-250(of)-250(estension)-250(r)18(equir)18(ed.)]TJ -9.962 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 40.677 0 Td [(.)]TJ -64.677 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-345(as:)-501(an)-345(integer)-346(value)]TJ/F67 9.9626 Tf 136.676 0 Td [(psb_ovt_xhal_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)]TJ/F67 9.9626 Tf 6.169 0 Td [(psb_ovt_asov_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)-369(default:)]TJ/F67 9.9626 Tf -278.833 -11.955 Td [(psb_ovt_xhal_)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -23.422 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -21.43 Td [(desc)]TJ ET q -1 0 0 1 373.603 549.032 cm +1 0 0 1 120.408 261.018 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 548.833 Td [(ovrl)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F59 9.9626 Tf 123.397 260.819 Td [(out)]TJ +0 g 0 G +/F62 9.9626 Tf 19.925 0 Td [(the)-250(extended)-250(communication)-250(descriptor)74(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 537.077 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 309.258 213.198 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 536.878 Td [(ovrl)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F67 9.9626 Tf 312.397 212.998 Td [(desc)]TJ ET q -1 0 0 1 373.603 525.122 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 333.945 213.198 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 524.923 Td [(ovrl)]TJ -ET -q -1 0 0 1 230.392 521.137 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q +/F67 9.9626 Tf 337.084 212.998 Td [(type)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 278.277 492.758 Td [(T)92(able)-250(18:)-310(Data)-250(types)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -258.11 -21.429 Td [(info)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -127.572 -23.549 Td [(call)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -23.422 Td [(Notes)]TJ 0 g 0 G - [-525(psb_ovrl\050x,)-525(desc_a,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ +/F62 9.9626 Tf 166.875 -29.888 Td [(79)]TJ 0 g 0 G - [-525(psb_ovrl\050x,)-525(desc_a,)-525(info,)-525(update)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +ET + +endstream +endobj +1513 0 obj +<< +/Length 1484 +>> +stream 0 g 0 G - [(update_type,)-525(work)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(work\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.014 Td [(T)90(ype:)]TJ +BT +/F62 9.9626 Tf 163.158 706.129 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-500(Specifying)]TJ/F67 9.9626 Tf 61.745 0 Td [(psb_ovt_xhal_)]TJ/F62 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F67 9.9626 Tf 32.282 0 Td [(extype)]TJ/F62 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-400(descriptor)-400(for)-400(a)-400(domain)-400(partition)-400(in)-400(which)-400(the)-400(additional)-400(layers)-400(ar)18(e)]TJ 0 -11.955 Td [(fetched)-222(as)-221(part)-222(of)-221(an)-222(\050extended\051)-221(halo;)-232(however)-221(the)-222(index-to-pr)18(ocess)-221(map-)]TJ 0 -11.956 Td [(ping)-250(is)-250(identical)-250(to)-250(that)-250(of)-250(the)-250(base)-250(descriptor;)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.564 Td [(On)-250(Entry)]TJ + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G + [-500(Specifying)]TJ/F67 9.9626 Tf 61.745 0 Td [(psb_ovt_asov_)]TJ/F62 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F67 9.9626 Tf 32.282 0 Td [(extype)]TJ/F62 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-330(descriptor)-331(with)-330(an)-330(overlapped)-331(decomposition:)-470(the)-331(additional)-330(layer)-330(is)]TJ 0 -11.955 Td [(aggr)18(egated)-326(to)-326(the)-326(local)-326(subdomain)-326(\050and)-326(thus)-326(is)-325(an)-326(overlap\051,)-345(and)-326(a)-326(new)]TJ 0 -11.955 Td [(halo)-250(extending)-250(beyond)-250(the)-250(last)-250(additional)-250(layer)-250(is)-250(formed.)]TJ 0 g 0 G - 0 -19.564 Td [(x)]TJ + 141.968 -524.035 Td [(80)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 89.687 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -79.948 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-207(or)-208(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.742 0 Td [(psb)]TJ -ET -q -1 0 0 1 436.673 349.49 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.811 349.291 Td [(T)]TJ ET -q -1 0 0 1 445.669 349.49 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1521 0 obj +<< +/Length 5699 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 448.807 349.291 Td [(vect)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.7)-1000(psb)]TJ ET q -1 0 0 1 470.356 349.49 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 473.495 349.291 Td [(type)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(containing)-250(numbers)-250(of)-250(type)-250(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(18)]TJ 0 g 0 G - [(.)]TJ +/F67 9.9626 Tf -51.121 -19.277 Td [(call)-525(psb_spall\050a,)-525(desc_a,)-525(info)-525([,)-525(nnz,)-525(dupl,)-525(bldmode]\051)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.564 Td [(desc)]TJ +/F59 9.9626 Tf 0 -22.403 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -20.571 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -20.572 Td [(desc)]TJ ET q -1 0 0 1 171.218 317.971 cm +1 0 0 1 120.408 623.505 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 317.772 Td [(a)]TJ +/F59 9.9626 Tf 123.397 623.306 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 270.151 cm +1 0 0 1 309.258 575.684 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 269.951 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 575.485 Td [(desc)]TJ ET q -1 0 0 1 384.755 270.151 cm +1 0 0 1 333.945 575.684 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 269.951 Td [(type)]TJ +/F67 9.9626 Tf 337.084 575.485 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -20.572 Td [(nnz)]TJ +0 g 0 G +/F62 9.9626 Tf 22.137 0 Td [(An)-230(estimate)-230(of)-230(the)-230(number)-230(of)-231(nonzer)18(oes)-230(in)-230(the)-230(local)-230(part)-230(of)-230(the)-230(assembled)]TJ 2.77 -11.955 Td [(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.572 Td [(dupl)]TJ +0 g 0 G +/F62 9.9626 Tf 26.561 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F67 9.9626 Tf 164.941 0 Td [(psb_dupl_ovwrt_)]TJ/F62 9.9626 Tf 78.455 0 Td [(,)]TJ/F67 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)]TJ/F67 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F62 9.9626 Tf 67.994 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 9.9626 Tf -92.901 -20.572 Td [(bldmode)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.564 Td [(update)]TJ +/F62 9.9626 Tf 45.938 0 Td [(Whether)-372(to)-372(kee)1(p)-372(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F67 9.9626 Tf 128.287 0 Td [(psb_matbld_noremote_)]TJ/F62 9.9626 Tf 104.607 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 94.146 0 Td [(.)]TJ -331.543 -11.955 Td [(Default:)]TJ/F67 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F62 9.9626 Tf 104.607 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 36.523 0 Td [(Update)-250(operator)74(.)]TJ +/F59 9.9626 Tf -168.029 -22.402 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -11.617 -31.519 Td [(update)-250(=)-250(psb)]TJ +0 g 0 G + 0 -20.572 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 235.367 219.067 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 309.258 231.892 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 238.356 218.868 Td [(none)]TJ +/F67 9.9626 Tf 312.397 231.692 Td [(Tspmat)]TJ ET q -1 0 0 1 261.648 219.067 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 344.406 231.892 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 269.619 218.868 Td [(Do)-250(nothing;)]TJ +/F67 9.9626 Tf 347.544 231.692 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.571 -20.571 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -22.564 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -20.41 Td [(1.)]TJ +0 g 0 G + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ +0 g 0 G + 154.421 -29.888 Td [(81)]TJ 0 g 0 G -/F51 9.9626 Tf -94.008 -15.579 Td [(update)-250(=)-250(psb)]TJ ET -q -1 0 0 1 235.367 203.488 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1526 0 obj +<< +/Length 1141 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G BT -/F51 9.9626 Tf 238.356 203.289 Td [(add)]TJ +/F62 9.9626 Tf 163.158 706.129 Td [(2.)]TJ +0 g 0 G + [-500(The)-250(descriptor)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(assembled)-250(state.)]TJ +0 g 0 G + 0 -19.925 Td [(3.)]TJ +0 g 0 G + [-500(Pr)18(oviding)-219(a)-219(good)-219(estimate)-218(for)-219(the)-219(number)-219(of)-219(nonzer)18(oes)]TJ/F60 9.9626 Tf 255.761 0 Td [(n)-25(n)-25(z)]TJ/F62 9.9626 Tf 18.305 0 Td [(in)-219(the)-219(assem-)]TJ -261.613 -11.956 Td [(bled)-295(matri)1(x)-295(may)-294(substantially)-295(impr)18(ove)-294(performance)-295(in)-294(the)-295(matrix)-294(build)]TJ 0 -11.955 Td [(phase,)-370(as)-346(it)-346(will)-345(r)18(educe)-346(or)-346(eliminate)-346(the)-346(need)-346(for)-345(\050potentially)-346(multiple\051)]TJ 0 -11.955 Td [(data)-250(r)18(eallocations;)]TJ +0 g 0 G + -12.453 -19.925 Td [(4.)]TJ +0 g 0 G + [-500(Using)]TJ/F67 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 97.28 0 Td [(is)-315(likel)1(y)-315(to)-315(cause)-314(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ +0 g 0 G + 141.968 -528.02 Td [(82)]TJ +0 g 0 G ET -q -1 0 0 1 256.109 203.488 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1534 0 obj +<< +/Length 5375 +>> +stream 0 g 0 G -BT -/F54 9.9626 Tf 264.079 203.289 Td [(Sum)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F52 9.9626 Tf 137.239 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F54 9.9626 Tf 5.4 -3.617 Td [(;)]TJ 0 g 0 G -/F51 9.9626 Tf -237.512 -15.579 Td [(update)-250(=)-250(psb)]TJ +BT +/F59 11.9552 Tf 99.895 706.129 Td [(6.8)-1000(psb)]TJ ET q -1 0 0 1 235.367 187.91 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 238.356 187.71 Td [(avg)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(spins)-233(\227)-233(Insert)-233(a)-233(set)-233(of)-234(coef)18(\002cients)-233(into)-233(a)-233(sparse)-233(matrix)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -51.121 -20.373 Td [(call)-525(psb_spins\050nz,)-525(ia,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ 0 -11.956 Td [(call)-525(psb_spins\050nr,)-525(irw,)-525(irp,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -24.099 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.835 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.834 Td [(nz)]TJ +0 g 0 G +/F62 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.834 Td [(nr)]TJ +0 g 0 G +/F62 9.9626 Tf 14.944 0 Td [(the)-250(number)-250(of)-250(r)18(ows)-250(to)-250(be)-250(inserted.)]TJ 9.963 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.834 Td [(irw)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(the)-250(\002rst)-250(r)18(ow)-250(to)-250(be)-250(inserted.)]TJ 4.434 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.834 Td [(ia)]TJ +0 g 0 G +/F62 9.9626 Tf 13.281 0 Td [(the)-250(r)18(ow)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F60 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -196.043 -22.834 Td [(irp)]TJ +0 g 0 G +/F62 9.9626 Tf 18.262 0 Td [(the)-250(r)18(ow)-250(pointers)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.645 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F60 9.9626 Tf 160.8 0 Td [(n)-15(r)]TJ/F93 10.3811 Tf 11.85 0 Td [(+)]TJ/F62 9.9626 Tf 10.131 0 Td [(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -207.688 -22.835 Td [(ja)]TJ +0 g 0 G +/F62 9.9626 Tf 13.28 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F60 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.336 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -196.043 -22.835 Td [(val)]TJ +0 g 0 G +/F62 9.9626 Tf 18.82 0 Td [(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-315(as:)-439(an)-314(array)-315(of)-315(size)]TJ/F60 9.9626 Tf 131.853 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.337 0 Td [(.)-504(Must)-314(be)-315(of)-315(the)-314(same)-315(type)-315(and)-314(kind)-315(of)]TJ -142.19 -11.956 Td [(the)-250(coef)18(\002cients)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 157.901 0 Td [(a)]TJ/F62 9.9626 Tf 4.548 0 Td [(.)]TJ +0 g 0 G + -20.481 -29.887 Td [(83)]TJ +0 g 0 G ET -q -1 0 0 1 255.013 187.91 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1540 0 obj +<< +/Length 6861 +>> +stream +0 g 0 G +0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 262.983 187.71 Td [(A)92(verage)-250(overlap)-250(entries,)-250(i.e.)-310(apply)]TJ/F52 9.9626 Tf 153.667 0 Td [(P)]TJ/F52 7.5716 Tf 5.424 -1.494 Td [(a)]TJ/F52 9.9626 Tf 4.278 1.494 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.617 Td [(;)]TJ -262.547 -19.564 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.64 0 Td [(u)-80(p)-25(d)-40(a)-25(t)-25(e)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(desc)]TJ ET q -1 0 0 1 244.034 144.435 cm +1 0 0 1 171.218 706.328 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 247.147 144.236 Td [(t)-25(y)-80(p)-25(e)]TJ/F85 10.3811 Tf 21.467 0 Td [(=)]TJ/F52 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ +/F59 9.9626 Tf 174.207 706.129 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 136.328 0 Td [(psb)]TJ ET q -1 0 0 1 294.938 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 328.257 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 298.201 144.236 Td [(a)-25(v)-47(g)]TJ +/F67 9.9626 Tf 331.395 658.308 Td [(desc)]TJ ET q -1 0 0 1 314.026 144.435 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 352.944 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 175.611 132.281 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(integer)-250(variable.)]TJ +/F67 9.9626 Tf 356.083 658.308 Td [(type)]TJ 0 g 0 G - 141.968 -29.888 Td [(62)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -ET - -endstream -endobj -1296 0 obj -<< -/Length 5447 ->> -stream +/F59 9.9626 Tf -226.299 -33.398 Td [(local)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(Whether)-207(the)-207(entries)-207(in)-207(the)-208(indices)-207(vectors)]TJ/F67 9.9626 Tf 181.487 0 Td [(ia)]TJ/F62 9.9626 Tf 10.46 0 Td [(,)]TJ/F67 9.9626 Tf 4.64 0 Td [(ja)]TJ/F62 9.9626 Tf 12.524 0 Td [(ar)18(e)-207(alr)18(eady)-207(in)-207(local)-208(num-)]TJ -210.765 -11.956 Td [(bering.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F67 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F62 9.9626 Tf 36.613 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -224.197 -23.056 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G + 0 -21.444 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(into)-250(which)-250(coef)18(\002cients)-250(will)-250(be)-250(inserted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 484.968 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 484.768 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 484.968 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(work)]TJ +/F67 9.9626 Tf 398.354 484.768 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(the)-250(work)-250(array)111(.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(dimensional)-250(array)-250(of)-250(the)-250(same)-250(type)-250(of)]TJ/F52 9.9626 Tf 252.794 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -282.907 -19.925 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -268.57 -21.443 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 463.524 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 463.325 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 136.328 0 Td [(psb)]TJ +ET +q +1 0 0 1 328.257 415.704 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 331.395 415.504 Td [(desc)]TJ +ET +q +1 0 0 1 352.944 415.704 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 356.083 415.504 Td [(type)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(global)-250(dense)-250(r)18(esult)-250(matrix)]TJ/F52 9.9626 Tf 117.084 0 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -107.346 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-379(as:)-568(an)-379(array)-379(of)-379(rank)-379(one)-379(or)-379(two)-379(containing)-379(numbers)-379(of)-379(type)]TJ 0 -11.955 Td [(speci\002ed)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(18)]TJ +/F59 9.9626 Tf -226.299 -33.398 Td [(info)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -23.436 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(info)]TJ +/F62 9.9626 Tf 12.453 -21.064 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ + [-500(On)-312(entry)-312(to)-312(this)-312(r)18(out)1(ine)-312(the)-312(descriptor)-312(may)-312(be)-312(in)-312(either)-312(the)-311(build)-312(or)-312(as-)]TJ 12.453 -11.955 Td [(sembled)-250(state.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ + -12.453 -21.443 Td [(2.)]TJ 0 g 0 G - [-500(If)-241(ther)18(e)-240(is)-241(no)-241(overlap)-240(in)-241(the)-241(data)-240(distribution)-241(associated)-241(with)-240(the)-241(descrip-)]TJ 12.453 -11.955 Td [(tor)74(,)-250(no)-250(operations)-250(ar)18(e)-250(performed;)]TJ + [-500(On)-314(entry)-315(to)-314(this)-315(r)18(ou)1(tine)-315(the)-314(sparse)-315(matrix)-314(may)-314(be)-315(in)-314(either)-314(the)-315(build)-314(or)]TJ 12.453 -11.955 Td [(update)-250(state.)]TJ 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ + -12.453 -21.444 Td [(3.)]TJ 0 g 0 G - [-500(The)-284(operator)]TJ/F52 9.9626 Tf 72.855 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.617 Td [(T)]TJ/F54 9.9626 Tf 8.232 -3.617 Td [(performs)-284(the)-284(r)18(eduction)-285(sum)-284(of)-284(overlap)-284(elements;)-302(it)-284(is)-284(a)]TJ -75.039 -11.955 Td [(\223pr)18(olongation\224)-265(operator)]TJ/F52 9.9626 Tf 110.535 0 Td [(P)]TJ/F52 7.5716 Tf 6.405 3.616 Td [(T)]TJ/F54 9.9626 Tf 8.044 -3.616 Td [(that)-265(r)18(eplicates)-266(ov)1(erlap)-266(elements,)-269(accounting)]TJ -124.984 -11.955 Td [(for)-250(the)-250(physical)-250(r)18(eplication)-250(of)-250(data;)]TJ + [-500(If)-263(the)-263(descriptor)-263(is)-262(in)-263(the)-263(build)-263(state,)-266(then)-263(the)-263(sparse)-263(matrix)-262(must)-263(also)-263(be)]TJ 12.453 -11.955 Td [(in)-212(the)-212(build)-213(state;)-224(the)-213(action)-212(of)-212(the)-212(r)18(outine)-212(is)-213(to)-212(\050implicitly\051)-212(call)]TJ/F67 9.9626 Tf 271.732 0 Td [(psb_cdins)]TJ/F62 9.9626 Tf -271.732 -11.955 Td [(to)-259(add)-259(entries)-259(to)-259(the)-259(sparsity)-259(pattern;)-263(each)-259(sparse)-259(matrix)-259(entry)-259(implicitly)]TJ 0 -11.955 Td [(de\002nes)-288(a)-288(graph)-288(edge,)-297(that)-288(is)-288(passed)-288(to)-288(the)-288(descriptor)-288(r)18(outine)-288(for)-288(the)-288(ap-)]TJ 0 -11.955 Td [(pr)18(opriate)-250(pr)18(ocessing;)]TJ 0 g 0 G - -12.453 -19.925 Td [(3.)]TJ + -12.453 -21.444 Td [(4.)]TJ 0 g 0 G - [-500(The)-190(operator)]TJ/F52 9.9626 Tf 70.978 0 Td [(P)]TJ/F52 7.5716 Tf 5.423 -1.495 Td [(a)]TJ/F54 9.9626 Tf 5.848 1.495 Td [(performs)-190(a)-190(scaling)-190(on)-190(the)-190(overlap)-190(elements)-190(by)-190(the)-190(amount)]TJ -69.796 -11.956 Td [(of)-325(r)18(eplication;)-363(thus,)-343(when)-325(combined)-325(with)-325(the)-325(r)18(eduction)-325(operator)74(,)-344(it)-325(im-)]TJ 0 -11.955 Td [(plements)-250(the)-250(average)-250(of)-250(r)18(eplicated)-250(elements)-250(over)-250(all)-250(of)-250(their)-250(instances.)]TJ/F51 11.9552 Tf -24.907 -19.925 Td [(Example)-320(of)-320(use)]TJ/F54 9.9626 Tf 87.879 0 Td [(Consider)-320(the)-320(discr)18(etization)-320(mesh)-320(depicted)-320(in)-320(\002g.)]TJ -0 0 1 rg 0 0 1 RG - [-320(4)]TJ + [-500(The)-250(input)-250(data)-250(can)-250(be)-250(passed)-250(in)-250(either)-250(COO)-250(or)-250(CSR)-250(formats;)]TJ 0 g 0 G - [(,)-337(parti-)]TJ -87.879 -11.955 Td [(tioned)-262(among)-262(two)-263(pr)18(ocesse)1(s)-263(as)-262(shown)-262(by)-262(the)-262(dashed)-263(li)1(nes,)-266(with)-262(an)-262(overlap)-262(of)]TJ 0 -11.955 Td [(1)-261(extr)1(a)-261(layer)-260(with)-261(r)18(espect)-260(to)-261(the)-260(partition)-261(of)-260(\002g.)]TJ -0 0 1 rg 0 0 1 RG - [-261(3)]TJ + 0 -21.443 Td [(5.)]TJ 0 g 0 G - [(;)-265(the)-261(data)-260(distribution)-261(is)-260(such)]TJ 0 -11.956 Td [(that)-267(each)-268(pr)18(ocess)-267(will)-267(own)-267(40)-268(entries)-267(in)-267(the)-267(index)-268(space,)-271(with)-267(an)-268(overlap)-267(of)-267(16)]TJ 0 -11.955 Td [(entries)-249(placed)-248(at)-249(local)-249(i)1(ndices)-249(25)-249(thr)18(ough)-248(40;)-249(the)-249(halo)-249(will)-248(r)8(un)-249(fr)18(om)-249(local)-248(index)]TJ 0 -11.955 Td [(41)-236(thr)18(ough)-237(local)-236(index)-237(48..)-305(If)-236(pr)18(ocess)-237(0)-236(assigns)-237(an)-236(initial)-236(value)-237(of)-236(1)-236(to)-237(its)-236(entries)]TJ 0 -11.955 Td [(in)-259(the)]TJ/F52 9.9626 Tf 27.963 0 Td [(x)]TJ/F54 9.9626 Tf 7.782 0 Td [(vector)74(,)-261(and)-258(pr)18(ocess)-259(1)-259(assigns)-258(a)-259(value)-259(of)-258(2,)-261(then)-259(after)-258(a)-259(call)-259(to)]TJ/F59 9.9626 Tf 266.124 0 Td [(psb_ovrl)]TJ/F54 9.9626 Tf -301.869 -11.955 Td [(with)]TJ/F59 9.9626 Tf 22.816 0 Td [(psb_avg_)]TJ/F54 9.9626 Tf 44.404 0 Td [(and)-257(a)-257(call)-257(to)]TJ/F59 9.9626 Tf 55.983 0 Td [(psb_halo_)]TJ/F54 9.9626 Tf 49.635 0 Td [(the)-257(contents)-257(of)-257(the)-257(local)-257(vectors)-257(will)-258(b)1(e)]TJ -172.838 -11.955 Td [(the)-250(following)-250(\050showing)-250(a)-250(transition)-250(among)-250(the)-250(two)-250(subdomains\051)]TJ + [-500(In)-307(COO)-307(format)-307(the)-306(coef)18(\002cients)-307(to)-307(be)-307(inserted)-307(ar)18(e)-307(r)18(epr)18(esented)-306(by)-307(the)-307(or)18(-)]TJ 12.453 -11.955 Td [(der)18(ed)-194(triples)]TJ/F60 9.9626 Tf 57.352 0 Td [(i)-47(a)]TJ/F93 10.3811 Tf 7.911 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(,)]TJ/F60 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F93 10.3811 Tf 7.84 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(,)]TJ/F60 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F93 10.3811 Tf 13.37 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(,)-205(for)]TJ/F60 9.9626 Tf 19.208 0 Td [(i)]TJ/F93 10.3811 Tf 5.856 0 Td [(=)]TJ/F62 9.9626 Tf 10.961 0 Td [(1,)-179(.)-192(.)-191(.)-180(,)]TJ/F60 9.9626 Tf 26.608 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.337 0 Td [(;)-212(these)-194(triples)-194(ar)18(e)-193(arbitrary;)]TJ 0 g 0 G - 166.875 -143.462 Td [(63)]TJ + -60.701 -29.888 Td [(84)]TJ 0 g 0 G ET endstream endobj -1304 0 obj +1549 0 obj << -/Length 3551 +/Length 4535 >> stream 0 g 0 G 0 g 0 G 0 g 0 G -0 g 0 G -0 g 0 G BT -/F54 7.9701 Tf 265.805 653.177 Td [(Pr)18(ocess)-250(0)-8396(Pr)18(ocess)-250(1)]TJ -31.163 -9.464 Td [(I)-1500(GLOB\050I\051)-1500(X\050I\051)-5163(I)-1500(GLOB\050I\051)-1500(X\050I\051)]TJ -1.299 -9.465 Td [(1)-4774(1)-1920(1.0)-5000(1)-4274(33)-1920(1.5)]TJ 0 -9.464 Td [(2)-4774(2)-1920(1.0)-5000(2)-4274(34)-1920(1.5)]TJ 0 -9.465 Td [(3)-4774(3)-1920(1.0)-5000(3)-4274(35)-1920(1.5)]TJ 0 -9.464 Td [(4)-4774(4)-1920(1.0)-5000(4)-4274(36)-1920(1.5)]TJ 0 -9.465 Td [(5)-4774(5)-1920(1.0)-5000(5)-4274(37)-1920(1.5)]TJ 0 -9.464 Td [(6)-4774(6)-1920(1.0)-5000(6)-4274(38)-1920(1.5)]TJ 0 -9.465 Td [(7)-4774(7)-1920(1.0)-5000(7)-4274(39)-1920(1.5)]TJ 0 -9.464 Td [(8)-4774(8)-1920(1.0)-5000(8)-4274(40)-1920(1.5)]TJ 0 -9.465 Td [(9)-4774(9)-1920(1.0)-5000(9)-4274(41)-1920(2.0)]TJ -3.985 -9.464 Td [(10)-4274(10)-1920(1.0)-4500(10)-4274(42)-1920(2.0)]TJ 0 -9.465 Td [(11)-4274(11)-1920(1.0)-4500(11)-4274(43)-1920(2.0)]TJ 0 -9.464 Td [(12)-4274(12)-1920(1.0)-4500(12)-4274(44)-1920(2.0)]TJ 0 -9.465 Td [(13)-4274(13)-1920(1.0)-4500(13)-4274(45)-1920(2.0)]TJ 0 -9.464 Td [(14)-4274(14)-1920(1.0)-4500(14)-4274(46)-1920(2.0)]TJ 0 -9.465 Td [(15)-4274(15)-1920(1.0)-4500(15)-4274(47)-1920(2.0)]TJ 0 -9.464 Td [(16)-4274(16)-1920(1.0)-4500(16)-4274(48)-1920(2.0)]TJ 0 -9.465 Td [(17)-4274(17)-1920(1.0)-4500(17)-4274(49)-1920(2.0)]TJ 0 -9.464 Td [(18)-4274(18)-1920(1.0)-4500(18)-4274(50)-1920(2.0)]TJ 0 -9.465 Td [(19)-4274(19)-1920(1.0)-4500(19)-4274(51)-1920(2.0)]TJ 0 -9.464 Td [(20)-4274(20)-1920(1.0)-4500(20)-4274(52)-1920(2.0)]TJ 0 -9.465 Td [(21)-4274(21)-1920(1.0)-4500(21)-4274(53)-1920(2.0)]TJ 0 -9.464 Td [(22)-4274(22)-1920(1.0)-4500(22)-4274(54)-1920(2.0)]TJ 0 -9.465 Td [(23)-4274(23)-1920(1.0)-4500(23)-4274(55)-1920(2.0)]TJ 0 -9.464 Td [(24)-4274(24)-1920(1.0)-4500(24)-4274(56)-1920(2.0)]TJ 0 -9.465 Td [(25)-4274(25)-1920(1.5)-4500(25)-4274(57)-1920(2.0)]TJ 0 -9.464 Td [(26)-4274(26)-1920(1.5)-4500(26)-4274(58)-1920(2.0)]TJ 0 -9.465 Td [(27)-4274(27)-1920(1.5)-4500(27)-4274(59)-1920(2.0)]TJ 0 -9.464 Td [(28)-4274(28)-1920(1.5)-4500(28)-4274(60)-1920(2.0)]TJ 0 -9.465 Td [(29)-4274(29)-1920(1.5)-4500(29)-4274(61)-1920(2.0)]TJ 0 -9.464 Td [(30)-4274(30)-1920(1.5)-4500(30)-4274(62)-1920(2.0)]TJ 0 -9.465 Td [(31)-4274(31)-1920(1.5)-4500(31)-4274(63)-1920(2.0)]TJ 0 -9.464 Td [(32)-4274(32)-1920(1.5)-4500(32)-4274(64)-1920(2.0)]TJ 0 -9.465 Td [(33)-4274(33)-1920(1.5)-4500(33)-4274(25)-1920(1.5)]TJ 0 -9.464 Td [(34)-4274(34)-1920(1.5)-4500(34)-4274(26)-1920(1.5)]TJ 0 -9.465 Td [(35)-4274(35)-1920(1.5)-4500(35)-4274(27)-1920(1.5)]TJ 0 -9.464 Td [(36)-4274(36)-1920(1.5)-4500(36)-4274(28)-1920(1.5)]TJ 0 -9.465 Td [(37)-4274(37)-1920(1.5)-4500(37)-4274(29)-1920(1.5)]TJ 0 -9.464 Td [(38)-4274(38)-1920(1.5)-4500(38)-4274(30)-1920(1.5)]TJ 0 -9.465 Td [(39)-4274(39)-1920(1.5)-4500(39)-4274(31)-1920(1.5)]TJ 0 -9.464 Td [(40)-4274(40)-1920(1.5)-4500(40)-4274(32)-1920(1.5)]TJ 0 -9.465 Td [(41)-4274(41)-1920(2.0)-4500(41)-4274(17)-1920(1.0)]TJ 0 -9.464 Td [(42)-4274(42)-1920(2.0)-4500(42)-4274(18)-1920(1.0)]TJ 0 -9.465 Td [(43)-4274(43)-1920(2.0)-4500(43)-4274(19)-1920(1.0)]TJ 0 -9.464 Td [(44)-4274(44)-1920(2.0)-4500(44)-4274(20)-1920(1.0)]TJ 0 -9.465 Td [(45)-4274(45)-1920(2.0)-4500(45)-4274(21)-1920(1.0)]TJ 0 -9.464 Td [(46)-4274(46)-1920(2.0)-4500(46)-4274(22)-1920(1.0)]TJ 0 -9.465 Td [(47)-4274(47)-1920(2.0)-4500(47)-4274(23)-1920(1.0)]TJ 0 -9.464 Td [(48)-4274(48)-1920(2.0)-4500(48)-4274(24)-1920(1.0)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 88.221 -98.979 Td [(64)]TJ -0 g 0 G -ET - -endstream -endobj -1308 0 obj -<< -/Length 321 ->> -stream +/F62 9.9626 Tf 112.349 706.129 Td [(6.)]TJ 0 g 0 G + [-500(In)-272(CSR)-271(format)-272(the)-271(coef)18(\002cients)-272(to)-271(be)-272(inserted)-272(for)-271(each)-272(input)-271(r)18(ow)]TJ/F60 9.9626 Tf 294.598 0 Td [(i)]TJ/F93 10.3811 Tf 6.254 0 Td [(=)]TJ/F62 9.9626 Tf 11.36 0 Td [(1,)]TJ/F60 9.9626 Tf 9.257 0 Td [(n)-15(r)]TJ/F62 9.9626 Tf -309.016 -11.955 Td [(ar)18(e)-311(r)18(epr)18(esented)-312(by)-311(the)-311(or)18(der)18(ed)-312(triples)]TJ/F93 10.3811 Tf 171.689 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 5.251 0 Td [(+)]TJ/F60 9.9626 Tf 10.413 0 Td [(i)-22(r)-35(w)]TJ/F91 10.3811 Tf 16.818 0 Td [(\000)]TJ/F62 9.9626 Tf 10.358 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(,)]TJ/F60 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F93 10.3811 Tf 7.841 0 Td [(\050)]TJ/F60 9.9626 Tf 4.622 0 Td [(j)]TJ/F93 10.3811 Tf 3.019 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(,)]TJ/F60 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F93 10.3811 Tf 13.37 0 Td [(\050)]TJ/F60 9.9626 Tf 4.622 0 Td [(j)]TJ/F93 10.3811 Tf 3.019 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(,)-327(for)]TJ/F60 9.9626 Tf 22.013 0 Td [(j)]TJ/F93 10.3811 Tf 6.917 0 Td [(=)]TJ/F60 9.9626 Tf -310.555 -11.955 Td [(i)-22(r)-90(p)]TJ/F93 10.3811 Tf 12.991 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(,)-179(.)-192(.)-191(.)-180(,)]TJ/F60 9.9626 Tf 21.557 0 Td [(i)-22(r)-90(p)]TJ/F93 10.3811 Tf 12.991 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 5.301 0 Td [(+)]TJ/F62 9.9626 Tf 10.407 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F91 10.3811 Tf 6.486 0 Td [(\000)]TJ/F62 9.9626 Tf 10.407 0 Td [(1;)-362(these)-325(triples)-324(should)-325(belong)-325(to)-324(the)-325(curr)18(ent)-325(pr)18(o-)]TJ -100.947 -11.956 Td [(cess,)-276(i.e.)]TJ/F60 9.9626 Tf 39.307 0 Td [(i)]TJ/F93 10.3811 Tf 5.103 0 Td [(+)]TJ/F60 9.9626 Tf 10.263 0 Td [(i)-22(r)-35(w)]TJ/F91 10.3811 Tf 16.669 0 Td [(\000)]TJ/F62 9.9626 Tf 10.209 0 Td [(1)-271(should)-271(be)-271(one)-271(of)-271(the)-271(local)-270(indices,)-277(but)-271(ar)18(e)-270(otherwise)]TJ -81.551 -11.955 Td [(arbitrary;)]TJ 0 g 0 G + -12.453 -19.925 Td [(7.)]TJ 0 g 0 G + [-500(Ther)18(e)-315(is)-314(no)-315(r)18(equir)18(ement)-314(that)-315(a)-315(given)-314(r)18(ow)-315(must)-315(be)-314(passed)-315(in)-315(its)-314(entir)18(ety)]TJ 12.453 -11.955 Td [(to)-298(a)-299(single)-298(call)-298(to)-299(thi)1(s)-299(r)18(outine:)-406(the)-299(buildup)-298(of)-298(a)-299(r)18(ow)-298(may)-298(be)-299(split)-298(into)-298(as)]TJ 0 -11.955 Td [(many)-250(calls)-250(as)-250(desir)18(ed)-250(\050even)-250(in)-250(the)-250(CSR)-250(format\051;)]TJ 0 g 0 G + -12.453 -19.926 Td [(8.)]TJ 0 g 0 G -1 0 0 1 104.053 292.88 cm -q -.65 0 0 .65 0 0 cm -q -1 0 0 1 0 0 cm -/Im5 Do -Q -Q + [-500(Coef)18(\002cients)-288(fr)18(om)-289(dif)18(fer)18(ent)-288(r)18(ows)-288(may)-289(also)-288(be)-288(mixed)-289(up)-288(fr)18(eely)-288(in)-289(a)-288(single)]TJ 12.453 -11.955 Td [(call,)-250(accor)18(ding)-250(to)-250(the)-250(application)-250(needs;)]TJ 0 g 0 G -1 0 0 1 -104.053 -292.88 cm -BT -/F54 9.9626 Tf 189.276 261 Td [(Figur)18(e)-250(4:)-310(Sample)-250(discr)18(etization)-250(mesh.)]TJ + -12.453 -19.925 Td [(9.)]TJ 0 g 0 G + [-500(Coef)18(\002cients)-190(fr)18(om)-190(matrix)-190(r)18(ows)-190(not)-190(owned)-190(by)-190(the)-190(calling)-190(pr)18(ocess)-190(ar)18(e)-190(tr)18(eated)]TJ 12.453 -11.955 Td [(accor)18(ding)-254(to)-254(the)-253(value)-254(of)]TJ/F67 9.9626 Tf 111.539 0 Td [(bldmode)]TJ/F62 9.9626 Tf 39.141 0 Td [(speci\002ed)-254(at)-253(allocation)-254(time;)-256(if)]TJ/F67 9.9626 Tf 131.512 0 Td [(bldmode)]TJ/F62 9.9626 Tf -282.192 -11.956 Td [(was)-300(chosen)-300(as)]TJ/F67 9.9626 Tf 66.146 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 97.136 0 Td [(the)-300(library)-300(will)-300(keep)-300(track)-301(of)-300(them,)]TJ -163.282 -11.955 Td [(otherwise)-250(they)-250(ar)18(e)-250(silently)-250(ignor)18(ed;)]TJ 0 g 0 G + -17.435 -19.925 Td [(10.)]TJ 0 g 0 G - 77.494 -170.562 Td [(65)]TJ + [-500(If)-295(the)-294(descriptor)-295(is)-295(i)1(n)-295(the)-295(assembled)-294(state,)-306(then)-295(any)-294(entries)-295(in)-295(the)-294(sparse)]TJ 17.435 -11.955 Td [(matrix)-284(that)-284(would)-284(generate)-284(additional)-284(communication)-284(r)18(equir)18(ements)-284(ar)18(e)]TJ 0 -11.955 Td [(ignor)18(ed;)]TJ 0 g 0 G -ET - -endstream -endobj -1291 0 obj -<< -/Type /XObject -/Subtype /Form -/FormType 1 -/PTEX.FileName (./figures/try8x8_ov.pdf) -/PTEX.PageNumber 1 -/PTEX.InfoDict 1310 0 R -/BBox [0 0 516 439] -/Resources << -/ProcSet [ /PDF /Text ] -/ExtGState << -/R7 1311 0 R ->>/Font << /R8 1312 0 R/R10 1313 0 R>> ->> -/Length 3413 -/Filter /FlateDecode ->> -stream -xœ…›Aä¸ …ïõ+ê8s˜^Ë’-é SvÒ·Ážj‘éÃ&‡üýØä{|ÔI°‡¦(ÚõmÑ|%ZšßŸÛ[yn÷øûúxüôµ?û×£½Ï?lîùõO³ögïíùaV+íùÇ_Í.µÐ=äÝ¥Mù÷:éß·šü³†ÿèòןZKúضÅÇÖŸ[J'¥™Ä´8ÍžùºMTÕmÂúÈiÝ&®ÀëvÉ ö™@î%o3ï‘o{æ ˆlv ß#"›È6²Ù|ˆlvÉD¶"ï×Õ@6“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>È}òmò= òmÏ|‘Íä{Dd³ÙF@6;ï‘Í.€È6CäZ£âÌ$² €löÌÙm"ÛÈnÙGŽì6‘md·K²Ïòˆò33‡ÊÏì™/ òÈåg#"\~>òÈåg#"\~N³ [‹ò3“È6²Ù3_d·‰l# »Md9²ÛD¶Ý.È>ßòãÐœqhÌtÌÔ3͇ÆL9óÌ.‡æL€CÚ ½·Ðf4f :f š37´fj‹ƒŽƒæL9óÌ.ƒŽ™.ƒÚ ]f h³33Í™Z3µÅŒAÇŒAs¦œyfAÇL€A Í › -ÑlA7bÌ8tS!j¦¶˜qè– ‘3åÌ3»º¥BäÄ– z¨G.á*¹‡jpäªÀ‘ p¨þF.¿¡ê©ø†jo,¥—*o)¼TwKÙ¥ª[Š.ÕÜRr©â–‚Kõ–Ë-U[rCïÌÜŽöVŸç8ßúV]¾c­úz£¿íü½?ZÄ”à+ÄÌ£¾õgD_œßå6^¸û=t‹–×ãûªjn–Œf¿9±¼š ñ…ÖÑêY2šý&Å -\h6´Oõø`Ih®]¦³g4û¡ŒŽ@h6¼!PN¶õ­à»7#¡ÝCèæwù¾*”›uA«H… íšÇMGÛšÑì×?ú¡ÙÐ>ÕãƒEhÐ!3ÇÌh¶$‰¾Kh6¼!P­Ì¹ müî·º ÝC˜DÃ]¾¯jcf]ÊÀ–QÑ)&´Š2@üAZ[ÊÀÖYÑM -͆e€»|_ôŬ롾oÆ‹;2Ñ—"è¨F:çš÷#}]Ëè‰/~.%0QŒ&Gª@_©v&a}þùøwQU=þ}¥j”ž²jþ.ªÿ²®3Ûõ©ìûu¹Ë–:Ù×ÕÉîý 2çË=ÄÇÒïÅe ¥†ñ·p¡?·TòþæwžÄÇ•šÛ%ñ™v©>÷ûÊ®ôÈm—)Æßê¥.=øÜoŸ‹øàÉ|X”Ù"È• -×›€éíAð¹ßÖCˆù\«ÈW˜hùÌï ]|Ûš_®¿Ü®™¯2/7ò™ß×k5ø Ln»`1þ` p¼¿ùís<‰K-³]³p½I™ÞØŸûm•ƒøX¦½¸d›™o‹<¸Ê‘ÏüÎ0ƒ/ÉܲÜ2»æú0=Ó«&ñUÖãcEF¾–ëÃMo£‚ÏýΠúHZ—×XfB¿p}gZz®ŽÎâˆh®ÃGCüdf®ÉÒˆh²äÒH•Ñ´=2²FÔEOh5ÕE_ÐZèSYd…UÑZR½Ôçº ë§T/^†]·=O©Þ©UŸ:á»bÏâMÝüŸg÷7¿3x|ð$>ªžÛ%ñ¹ºÅk¼àsÿ©¥Ÿš^ðQÅoê/ú‚Ïýö¹ˆžÌÕ³¦*æ×»ºÅ Èàsÿ©õŸú[òAÅÀW˜ªøÌï ]|Ûš_ªžÛ5óUæ…ª>óŸZª•U ñó@ÕÃýÍoŸ‹øàI|T=³¡b~½«[¼ô >÷ŸZ ªk}±ƒ™o‹<@õÀg~g˜Á—U/µŸn×\®nñ¶Z|•õÁøS+3ع>\Ýâ…vð¹ßTYõÔqºIóë;ÓÒsutGDŸ±@33tÌã'“0smL–FD“%—FªŒ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zË{¿ª^t¯[¨žÞ§¿û¾…êùÛ@Ä­Ñ`[–«v ,«¼¿ù«º]ñ$>¾È«jHq½©›v‚ÏýEkC½TŒñ·ºi¯ øÜ_ÕòŠ'óá]UWŠëMÝ´‡|î/Zê}ù\ÅÈW˜¨ùÌ_Õ÷Š'ññõ\UkJ¾Ê¼@õÈgþ¢µ¡^í*Æøƒy€êñþæ¯j~Å“øø&®ª?Åõ¦nÚ7 ->÷­ õïÅ7z3óm‘W=ò™¿ªOækª¨ù*óÕ#_e}0¾h;ׇ©›öÄ‚ÏýUm°xßPyPÇp}gZz®ŽÎâˆè«43©cˆŸLÂ̵1Y][V½üέ¦†5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½´â6Tl4©^lÉ]·íMª×µ6ÔNÉ‹»&ž%Ä›ºõ)ÕÃýÍ?Ô‹'ñQõ†:V\ïꛉÁ¿Ö†ÚU ñ¦n±Ý|îêˆÅ“ù zC+®wu‹mÐàƒ_kCí*¾Â\Ýb[=øÜ?Ô‹'ñ •Gè˜_ß™–ž«£³8"ºÇ*ÍÌÐ1ŸLÂ̵1Y=Æ¢zÚð¬÷mŒ,…uÑZMuÑ´úTYaUô„&Õ[ö…›:ÖZG¨žvõ_ºP=ß-Fü®5lËãouÃYË*ïoþ¦ŽX<‰½M+®7uÓy„àsÿ®µ¡6‰Ácü­n:±|îoêˆÅ“ù°§ÛÔ±âzS7¤>÷ïZj?˜|®bä+ÌT|æoêˆÅ“ø¸}ÛÔ±’¯2/P=ò™×ÚP[¿àƒŠ1þ` z¼¿ù›:bñ$>îÔ6u¬¸ÞÔM§W‚Ïý»Ö†Úå}qÇwf¾-òàªG>ó7uÄâÉ|Mõ#_e^ z䫬ÆïZ£ÁÎõaꦓ9Áçþ¦ŽX<‰o¨<¨c¸¾3-=WGgqDô«43©cˆŸLÂ̵1YÝö¬zy?¶¥ž5½xò–Ï]Oh5ÕE_ÐZèSYd…UÑZR½tNÆm¨ØÔn†]·ÚÍðÓ3ˆïZ£Áö, ífณguj7 ?ÏUõÒ‘Ø%ñ¹º ífàóÜßµ6Ô¡ðQņv3tè)øÜ?Õ‹'óAõ¦:V\ïê6´›Ïs×ÚPçcÈÚÍÐq-ñ™ª#Oâ£êMu¬ä«Ì Uoh7Ç_jðQõºÖnŒ?˜ªÞÔn†.â繪^:Óâ6Tlj7Cà‚Ïý]kCzyñÌÌ|[äª7µ›†|YõÒñ·k®W·¡Ý òUÖã»Öh°s}¸º ífðþõÁøy®ª§“*nRÇfìfÄàë,Žˆî±J33tlÄnFI ¶ÉÒˆèy.ª§Ó)žõ¾ í‘‘¥0¢.zB«©.ú‚ÖBŸÊ"+¬ŠžÐ¤zíÐn†Û®b­i7Cg _Vµ›á§ _µFƒmYbü­n81iYåýͨ#OâãAÀC+®7uÓ©ÈàsÕÚP‡Ácü­n:7|î?Ô‹'óáÌß¡Ž×›ºé®?Œ÷Æå‡ñ¶ŽùÃxü0î÷øúz~ÙëDõ¹žDí‡E]Ó×…aXŽí!Í>øïŸ×düoõ–‘¿Ö«\¯ç­ÜýÌ•çë»ýô—w-ÿ/Iÿõv×ï!o'ÈŸÿ`[G. -endstream -endobj -1316 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 13073 ->> -stream -xœºwxWö?laÏŒ˜ r‘G¶5h†ôB'ZBïL·1`pø˶$K–eK²%«Yr•-˽w Lï%„ ”$$$¤m²›Æî{½ûýÈ–ßû<ïûýãõ<~4£¹ºsçÜs>çs -ÏÇo”dz%!)&cÒ攤}ÉÞ뉚ç7Ê#ñ-VyRŸ/Å<ÌŸâ1¾ÅcüNŽc~G¿8ŽfüÇÑA>~<Þ¬5›íonݼý­ &.II•§'ÄÅKÇOŸ:mÆøhùøÝ¿4&#!.yüëÜIVÌ¡”Ô¤˜déú„¤èÌŒñ/ž<~sL\æ¡}éÿúøïDÿÿ¦æùsïæ³Ý§•À‹åý<*Åw¬ïjßM¾}~´_¤_¬_;ÆÃæbë±d,ëÆð`|<þ -¾—ãýø)üþ þÿ'‘G˜;?€Ì·ò«ø®ÑüÑ£•£¿'7ÇÉÈûà=°,Ù ¨­c|Æ?æ1³ÆèÆÜ;nìkcç]4vÍØð±ÛÆÆ-ûpì+ˆ¤ -ä‚|ApQð‘àgÁßÿôçû ü—û¯ößà¿Í¿Ø¿Ä¿É¿Ó¿×ÿI€0`I@x€2   0 $àTÀÕ€Û¾ ôd_ |#p]`Z`f 5°.ðbàõÀ? šô~Ðò ¸ Â ª Ú æ Ž ž þ ÓA烾 ú6è7¡¿P('|S8[ø¾p±p¥p¿0Gh– -Â&a‡°GxEø¡ð¡ð ! ö ‡³Á¯O^¼*8|=øqð“à?ÿüwʇR¯Ss¨ÔJjµ‹ÚO¤”TUBÕPõT'u’ºI=¦þLý&%"Eþ¢0ÑK¢·DSE³DóEËD«EE[D¢ƒ"¹¨HdÙEu¢ÑIÑ%Ñ ÑmÑ}ÑW¢ŸE#!£CD!lÈÄi!³C„, Y²1$2$>D’b ) q†4„´†t‡9r!äÇ!C~ù-…ú…Ž„ -Sðˆ—¢ÒÑ+F~*þŠÅhÃb8™øÂP©®œÃo€J)®ž«RÏ2ðÑd˜Ž·›¬ÖVÒ8äܪêéü&¼¾‚´Êð»úŠ\óþÈÆåàY“R)Ôz%-ÕV ±p;qFߦldz3÷Ön¤'-ÚµVZ›ÝØXç®·mF;k(5:ìâ–ÎúÃ'[’·2ë 4i½<o ?3[•pP à?žÏ¦P8‡^AQ“:§œÛÌ®½ôôÀ×4Ü ÇAFý%ùÉ–ËìGkgõN ãüŽ—'ö²çvuLB@¼Q½8©êÍt{Ð`‚;èã'Põ‹p¢'œÛÙ,\8.³0O­ 5 -«CÁ -»ªbcé}{ÉÙøCò}ÛÅó†Â¡ßFæÎ3û›bø¥¹¶¬tñª»O[w -v2R¼Åcõ¸pm­©¢ÊI—×jk¹‰”ýƒ4äýè÷Ï·Ü@¡wØ—/î:/>¤ïÚ±#²¸¦#±*­z#§ÈZ<{)2mbиï߀A0èÇgœv…Nù³¹)Ô—' £V.<Ûx8‹M9“sûcñŸŽ_»Ã€%dMù$ p‰`¼ßiÉ©ìÃnâØp}E}²ñÐ*ņÕêœ]E|%<à$vÙò*Œ'ùPNü°ç¢݇kV0­y»,!o»2LAXìp¦¤µT¹‹E&¸1/wg@žïó7j©KvYr7Š†%¡PØtîÔw⣹}imÌ`̬æÉ4j“á÷ -ÊòM‹É|ÍB_Ûk‰ù6u…ñ.î$.˜a>ºŠÁ™øH„ç ÕZî8 /í´3k_œŒÙ¯›`àƒB(Áòð½¾HGë4f[ëJmô{›·/ ïÜÿe{é Ö“]“qH¼'1eÇòøþ_¥LšZ‰—Ù­æ -èår½ÞÄ€¸¶ÊÑif¬eŦr¶uɈaÎh~F/Á´%-&ŒïäçO-äÖy¹–˜nÎ-7>àÃm|f]ýº¹þ´5 tyÆSŽÊs9ÝáP¬aÑ:bòðÖÏÏn>y„9r²öÚc1$bžìLÏÎN—•åÔg3-%%¿"£äÐAñÛ›–ÎKÉ(¯U3y®ÂÆa1Œ!Ú9³®b<ᵈžÿ|yzˆa¸KÇ#_Þ›†¨BþJ|¾¹Àbìå{’óÝÊʇV> p$xý KÀaš‡]ÃhÊc*IŸ—¥fTé©Yt|J÷QÖJ˜® õÃу|p¯z (E ÑgªÎ1ǣݨ.$¾v8Ošä$:@§èó³I˜U“ƒÇ*+ûŠX¸º0W"Ñ’8Ó% ß×£{žM½òNÜÂùì’eû'¼!Füþé0K?yÿ ýëÓ¾»wÙO>éÿî™øYì·‹î2÷Në{‰F?ŽQp œÈ[`8œˆ&¢-h3š„&£ o½ßys³òøÇ߈A¶d“Äl3›îJÀR@yëÈ{ØHðwäEÒ…äĽjU!wßš[a¼Ç÷8ÎœG¨SÔ#¿×ÁùØÈU\‡æcxäÕÕ@ÂÌ?7R„gækféù2àù.¥’Æ'¤"ŸB>*Ã=K=Åœ# D6Z€5ã5¿ô¶A‡iøȪ‘¿bJÏU¢.ÅÄ%ÊZ5 '¿ Š7Kµ 9#k½z·¤iÎ’?ý×JVP@ÂQ*éÎldžΟ:¶…5%P=íIqqÉÉ1ñÉm}½ím½,ðµ‹`®Ï6í³¨Ë}ap-Ç£ V½Eo) -sh,:½8?_§fdJ˜ìÄ÷XÕî;&Ám¡m-0úµTß0„9Ñ^1`,/ÌåÆæªr"˜g6Qh> NÎ߬ K*ÀrSe©©Œî.Í`G²㶢¢¥y|ð¤ì?¯S-vçVpýûv×3†Õ 5ŠRÂU™Ÿ›_˜¯Éc9 ~ K{Š›¾µ•ž²ð]5([áÂOÙòÍSùÓS _ÂòË eâªrG-Sê?fêVôï…£( -Cs±]’e tFÔÈñ<­Ía·Øí¥, €Ó0Á{¸ø©ËyÎʯEÛdD±\—#Ž]„F½ŠøLŒÂoW:/3.$“—+•ËX•E¼ -ù‹à¨>qM¹­Çë3¸ß+¬U^˜W“qzE…²"‡;”Õ™¡h5Ê[>#!-Â&‡%."ÁœWâ;í'®3P7`°$Wµ„@Í"A éYå¦à›«Œ—ÚhDM}Û]±».†­N­Tv§.FãCûð ?–ÓӠЋÙê“}lNcnÅ¡z~âªmkyÔ1½gÅÂiE{:“ÕI¥^R_üHüÉkwN¸yó&P¯áPˆrX_•ï -;žÓ¸‹FÄd4½…‚ïLûí“‹=wŽ°%1Žl§”¯(/Ï·Ó›Ùbg¿Äz ­¢À ¡÷'÷}¢`g¸(Ä ¦wÄ -GCvð‹Û” n¶JêH¯{Ÿ/°pÈ#'nÛ2¿#F‚BgEç*Òiu¾µJÆVeçYtb¦<5®Szúèáê¶6¶¾¾´²ó\í¡B>éï¼næ»P†œ8Y©•j‹‹óÓÙ‚L$B½ˆ ˜ª9¿¥R\ç*ëg*]8(îó|ÙÇûýWÎyjìãܳ® ×Ø.÷=ýIüMòÃðóÌG+¯IhÔ Ãif‰q±Z½Pχ›=7¨Ž2Ç‘¯nä$ 3·ÎD>bDFÏšÉ$ïؤžAïW—v²àeÌÕÒñùåÇYÏ BoŽÌIÈ[¥ãôßa+.¥ûJ5Ñ,šJtƤ–ï!Y ½Œ‚ïÍ€¾úÝ'ºÙ5„µ¾qC®PZ­Z“Çd+Ò4Éô¬­ŸrêwòÞýsÑÛXÐË"ŸXJ²±E<ØÑrübûÁi ŸŠØA[5š™ãÀëœj#[ W¨™ÍsößÀrk5eâ²J{ c¯F‘Üc…&㧈:¶8tÅK[f¨Õ1ú0™Ëskòˆì¼’ -‡Ùæîd?„¼[hV!/Qæ‹só´™L$n•÷óQn?§à\ü’ÑfXÆwA7-ÎJ 7ñÑ\h†“ðßÌéåÓkL¡ø66b”ãÀEfå¼M‚zYŸÝn2•0à0i+.6ß•Õ¼Ò2¥`íО«Cƒug®2 Ø wzh7ÏC<ŸF©q”7rdÿ¤<Í*gIiµj"NZÕ`·7–5±GžaP÷¿U´æ(ŲÜ8&G;\¹xTZÛ×qì0ÔÏ0ÿo(wÉvÎÒ;dÄ]™Æ´ˆþã€v™ -œ ‚‰€„ösÙdÜ´£$ÃÂÔêË:«Þ¦·…™óšR½5,¹ùP»Ñ¯Sd»t´žûËmÚßÇv¨Éj:X¨ Õi±ùoEoÊë,E%6³Éfe._:Ü÷ñU¾nõì¦ô–u1¹á^U±™0³ÓÉ¡eC©b/;âO,(JÏËf4:UŠVjìu -¶A®*Í¡¥rEfâ€ôüÐ᪦¶·³þÖ#1ðúó9®ŒDKJÕBZðݧr -0ñ6š{(Û¨ObáJâvµóÇÄ/Q¶–Z³›þÓ5§¯]?-+ÇRÏÑ$‰0à+¯fªkP–¢¯”ü!0LÖe¶€µš`r'$zqÝY{ÖÌpû'€ob.YÁdNmÞÐNÅä¨ÔRZ•]R*g3*0ḊŒŒ’LZ81E"×Q^íl„8YUØD_€A¸ŽZ³™žÎ!GºpH­ýø-ñæ¾BQè£7ž;Ý2Üͺ+Û\øè £n:ºûOѧÛcVYX #Éò³¸€&l©Äó澞7á= -%kÕ²}FnÄäPæ"ä&lŸY]{] “› (B°kíj¥Xž§KàF÷)Æ^k«)7fÖ§gdffhl -»Œqï°flÇ™”Ö}5|‡LfÓÐR…<-Å©l«(³”6²Îk¤gËÒ3Ü\ çv76f»¥,'®‹›/yBÁθ¸ÕJ¶ ÷"o‡»p-d%ðÊC_xÆRÓ Ç„8Â+WÀlÎMí´èKßñŸ¶Õš†9ò-Z­¸@«W3r˜ü‡·çvã8ÙÓØÐÝ™Y«Õ:Æ 7é]‰£«¥ç|? èrg¡ÖÉ6äa O¢×/Ý>•Ñµa½DÅgHÀšè̃\¤q{óO?ß¾ú»›‘Y°÷Ó#¢WÒ+wœºÕæ¨kè`UmÚº½¶¢Î-­ŒOÏÔçƱ€* ÷ªc8n·Þ8Gº$—蠟-\ŸT#oiuÖtX‹íÆÖXŽ•]憚£tcmÊÎÚsl‡¤bÃÚ™qàØÊd —,$#¬Û$î…Á’Ø­±¬@AÂK25Æ,–î*ñYYk|¼,e[®e¯9‡D?cŽ%TbZZbbsZgGssggZó!?RÝÙö6ºZ[/óÑßÏP¦Ø’Øò8¾‰“ŒQ,_›40|¥¥Žb>?G} òÞS¨æpáþk\¸oj·XZM|8îÿ;ÞXÇ áÛä2 Øÿ²`a„Ïÿ+^íÙå–­¦Õ3½xSDÂÃ.BjÂÀú=Êìýâ §£ïß?5xýXVëò“LêZêpjUö!ñA©4>:µýDWEC ö;vQI8Q”$p8AÀ©$Тä‹ðz<8`k¦…Ÿ'Àóhøµ–GâhªÅvÁ•Ónsž£K)Dã†+°!øµμ|»–.*4 -ÙW·b ]r‚$²Hà,kV:Ó‰£SS£åyÖêL¦J–gË£³•9Y‰íò³Ð÷óKàhGMC«øDdïúuRWÄ3Mrl¨Ã]ÙK÷ÔÉ’bÓ2—IYAÖ{$ðØŸ_šºYgÕš9Ÿ09R+y:bÇ•ú‹¤>³µ½¾¾­MZŸÌ -ŽŸmâ~Þ™>•‰yä‰&n¶¨Ú™el}4½+*}ã&|$™z#Â( -½‹fr1Åø.šwrÇl8 î€;¸«¹h+§¦(ÍE³Ñv´ÎD³¹Øb7à]¸FqCf¡(nH#ùÂ"ÏKn)¢ìƒáÀ¶†¹(¸ͨ5X¿èêøÒÆi{TÔÖð£{€'ŽZž Ñ/:dW…–Y1³½Ää Ûì¹›Yt÷ü‘}íõdžÅO\E~±)ºœh¦@ª)Ê£³4¥µEly¿Ây¸Â!øµ›÷ø¬{æ úȯŀø¸ºj R%ß ³ŸI5è…h2Ujdb@Í—€ç¡T½;UŸ¶ÉÌŽ¼CÌÒ¦eæiJZf(3·<ô†+ ™TÊJJe^äljm¥¦†®sV7ÝÙ2Á¦´]qlRt^¬n<%¯H†<¿æµB.Öžü憓Ȃ‰g?µ"!¯pQ÷†å k·p¶ºø=¯Ãñ°ÙeÕÁ , O)Ì÷æ1ŒÅµûY×e€j¿Ð{üÞÉh4#ÜFíZ¸$rso;#ÈV$drŽ&e;yf(± $A¡¤·JHPÉÙû/`äµ›ž¸‘¸U„ÀE,Êqâ. øJ˜è$¶Z5¥gÅžŸ8v•oЩuaà Ù~¿Žˆ»rk22reà°„ pR|}ÛÑ÷£ÔR)“%ÓÈrv7©B‡ëÁMÞ7¤'&Q#q€bþwâ‘@?’{0@‰=ŸöƒÊÒÊ7HçQgH¸·…Û®ó Ñw–Qk¡ñ:!ym•'η - Fì¬Á)[+nÉcÑ–G¨Ç>?ö)Øõ˜ÅNC–Èi -õ!:+ËVª`OL—Û£èÝß°ÛL@'LGÜ?wze;üHhG‰ˆÐFÜxûÅØpvˆ®¯(Hu±àÊì"œ(rÿÜé†3(OI.I§wïÎ8Â&íÕ(|‡f‘¦øœ|N¹Æ_CZüþ®¼ùO%R¼Ø -¼;ʪaêÖÆÂúá‰Nð{±Ûóí0–<ó…ïŠ8\â|”ÃÏQ_e®§¿¼°iæ»Û¶Ì“ ™X/Ä2£RÉ PQ¡ÆMyTPk?ôÝ9nÐüîWHóBÕ‚—Ƹ°ß®ÊÌ/L1†¡±ø“/±«à)Ïï%9áÎlim¨oÍ#SY çTLZÎ@òbŽ[N—,žÈEøÇ@wQuÑ`_ëvë+èúj¸_1pø}¡çàŽˆ¸ƒ;ØD€N่Ūq×ý–¦|¹|ÉÜ-áÛoßgRiJ2p‰VCq¯³ÝbÀB1|…ÄA9•Š»àÎv8ßͼ»_Àõ_ø‚nˆ{æCÎByCžw†8ÿ}ǃµˆý|ü³+͹yn@Á·Hè(øˆ,ª^Ï?¿'AQ4¦Å}Ö[5¸˜ -Âx±¼â’¹Áêg7Üzòú±÷â®g¹Ç„tƒ“0„Ì¡îÕyº®Úd=ÌáQº"Gª3éÌ:Öª^±ÉœWaºVA€¯AâÒÎë`Éé)h{aÉq±mÉà_鋂İíòO^8Û tf'1Ç’Wþ‰~€Z»ˆÇ=ö¦~öŽaó é|ùˆ/ü€ó¸c®ž\@y5|$ŽÁI5@ÉÝp­ }äÑ|ä Ð<3 Šk<Ó8‰]ò¹®¥¹ÐM?ô p .§<<o„Ç;*£߮×`½-à2v]w¯ø>?H;€üëY5߇„•LØ´Uó3Ù¯ @å1}$$sëŽ*Ž†Ž7êô™,uëõè/|Iô<âyÄ?"pð¾v®î…KZx@QîæT ^¿7Ýßò·x2–]½744´µHSXAC]{{zc+(5•Óàù«ÜdÃôcàCNgÆÌwgÍž3ušÐ'؇òù„ø„ú„ùˆ}}‚|€·ªäç³ÃçSÅ+æyF½2Êéëç»ß·• ´Å÷= ÷xðÛ'¾ž(¡rm±›ò´‘œÚàð=¸{úýÀý‡âŸç>xeËöÌý±Lb‚2Q±´NúçÃ=·9Û<ë½ÝSfMgÑr´ S{Ä„@Ë©Ô·W‚àÇbî?‚“àcêrƒ!¿½.í_.~7|Í¢”l{c,ãJVØ´T©ÊŠ?šyûþ£æÁaöÄ`Ó¥ÅgrŽ§t0Yõ²Š½µ|á—·N4õ¾öüä{‰qLRŠ2=kƒ³0ôÞác×é›C{×Ä)ãÓÓØ$îΊ >÷ZnøÆ Ïûî xìáüÂï=‡`õšgÓ}8² [»ÑE£÷dkcÖ²-pôƒ>pZ3E£ê«“KP u`ý²w¶þ§œv´ºlu]Gi-и=wïñ<[9O1‚ãÑ5ƒÖV^0…Á$íùû‡ñÏ_öJ£õ¹ 5èÂýe÷`ÊG[î À£‚úù\÷à-ñ㥗_çBœ•SW×&?›ÂŸ"Þì¬äåâ·®„8óàæo†ÑØoù«ÔÝíqáâÕQQ«ì¸ô䃺ÁËÃŒðÁÉÔôKqÇ/ˆ/¼~ëèžåëÓö¯ˆbZ¨9êñkåµç{9“‚~•çkoäÿ~ÿ*Òb8g*DLÑHpg.ô9¤zð,f!ßñhb^†Ï©ìÂü¼|Fš£Š£D|ÁÅê+uͬ»¦©¼þêØLô˽`óó f^ã°òG_h>›Ú²/'.–݃4ZŒFu!þ‰pfOßéŒKôé£5=½ì¥óýNJᾩð%Â¥Q_CŠA1{ÆÏœ¹çoðL8G=a½zø<€Ãª„'¾Á6'>_Q>ld9:P`F+§b*Â9Ðh/¥ëJ•‰ìÈN°V¡Z`ä TUžme0ª -ï'ƒ“Âç±Á+ÕdµÚ,k‰Õj3fÀê¶Y­î¢Ì:f,Oà5ó½>Gx8ï,ïþ¨M£®Žºé;Å7Ê7Ú·Ø÷¾ïcßïüÄ~‰~R?›ßE¿ë~ëÂNcÿÄq|4.Ä_Çâ…„¡& -‰ßùß¿0zúè„Ñé£?ýýè¿ÛÈDò6ù|Jþ Ð`&X*Á1pÜCY}#úE„DÿB†CÆ…¼òzÈ„ç^sGJþ@ïÌ6¿ÊG Ä -]Œ"‰‰H]sh!½tWã‰TVÞ©í».†wOJ>õ&šzeÄ£?Mè-úlœyŸôÜ`” †U„ùYyÕg¥|ð|[-eÑ”ÅßCfÏ¡Ù8:0¢^ƒ^WçDÂdpW-‘]ÑVa©²U²Çáh :ñÁ XEª=M%NWl322è Fr9² - ÈzÔŠ8›Z±7y ½yoÛÅdVÞ¯í4\ãëxÁš¶ôN~oÒîºíôÎHyB ºtŽ1wfBâOŸ\†>GôN¤–w÷]¥¯6¦Ì¨dÁƒ–V_ÏAO(50ŸÈRYìÙl¦+KK±¥Ñ“-²¶9æÃlZFV¶:¯¨0,+S—§g–çÉf–lO;+Þö`ÏŸr†éï¨éì_‹\deU‚m5g”V‹Ý]=-¦¨·Ö`¦û;[:Ø®VgÏ)q›¾AÕÉ íˆ˜Òv?Ô¦¿éÊÛ´à¦onj(ufmð{êrkÏñŽfER5S_µ_ž¿{]ô?2% „ó°u8ò¯ºq8ö7(xüäû|'ü…rü¡ÞQ`^ÀGï‰R£Yop ¼þ‡Öo‡N2'^o¹E_8%‹ëgÛ“«ÓšÖóð‡eâÙ:yn­É±Øå¬3*ÒEsaç+è ´÷­–…6²ËoþGÑ_Á€žžÃ,ÜC˜¯–W\´ò'ų È•xgu~N¡QW¤eßFmˆ€%˜®ª°ªLìrÚš'ŠVÍ…5yÄ‘1´"Õd´iø­;«c•âézÅ!&]™%SѺ|oÑÒ.—[sè ™,ý`GúÑ[ÇÏÃ×γp²gbegyY£% dJÞ&ïsÏv*fr\–.GMç+-6%[³k§#’ž3ûЖuì²u1SÐ(1Ú ÃÐ8GI¹ [ºxeÒlzm¸»/»åä¡o Ÿ®úžƒó×.žÉ:p˜éLt¦Ô®æ¢I_eÓ–zK°(´ )[0%á0aæŠ -SÝXš³‡EJ°(_³¢ˆò'¾¨%ŸªÅç˜UU†{PHš?©¬xláŒè”ŒøR_®1KŒ Ôy³¸Á¾ž—áTjYD\ÆfzÊÚO!ñûùÛŸõVjbÊØ’L,ÙáRÔÓÍuµ –˜µkGv\4»sOÊŠybD=š }®îèeZj[Û/óõa×±i$hùw@ò©¾Ray‰¾!F( C døѪ‚ÈÃìÈ:ÂðR®bŽž/ƒêZb¾UYgøïG›jøJsN•á2fGNµ–”ê5åŒÒY¡¯¤5­]Yî„})™Kv²ßqqlxfœ*\–CXªJMtÓ aXÃvnm\½âÍë¿+g%Ï|¹ˆ?RâM‰µH®‹=‘1’zgžw!MÉ­òI*õ„¢0Äà°ÞóæD?EˆÄ\¸ãã*ç5328ªy SzÆ`™£ æ9ñµ–¼2ã¾ç¢Ð¼#'6o©",§„0•W˜tg™*’I#Œ[Uªu…œ¦÷Ãôö>XÙËó¼üßç=. -¾4r5›øÀДŸ)Öåéòd‘\¯ÎÛ«ã+¢ÖH쳦•;Åõ —2­g1¸MÄ Ëµùkó†P™ÇN Á÷° |•*G~þüPšÕVÈê-E5b˜_#>6Ôf;Þç×ÁÜ,\;/[ù²|¡o0•ØÝôW8|­I5µ‚9Ò)ø¼ƒ¡;HJOOLnJoëhnlïHkNbÓé¬vV—•^¹ÙÞyûÓFç‡Þ‚_¡Œ8a,×g‹ãÞæ؇ ˜@|PíºhbÌ¥Å&ëBƒ3$pðÉÅ\U¥o`‹Dð9À_¿Cn¢×$´¨zQ)°gÔBN‡©ŽÔ;ëú®nÓ[ÍF]b±”TF_šÿg¶Èj,Ö•6uÕÔu ðÛz›WŠmz›^—-ÏR0Æ¢CIÉiZ]6¿xr¨6¼lsž7ÙëĽªÚ´T•,.ˤ« -g$$Ï_žNí—ŒÇz×DU®¢ÑVŽÅ½‚ö¬q®iÞɺ⫳ÄLDlèIüäc±×ŽçÞ¢á6.î} ü õ+ïQ¸£ûù{×PÝ«+ç‰Ñæ¹h4 -Ý]{b/“ÞC%ÜPŸÿZ Ã?‡†\”íb@q삵ÿ­ª=¡¼U3ÓB>Ú6{ÏcMü«|&ƒ}ÄÿZnÃáStB¯Ãl¼£Ì1ðÍÿ^eƒø:!ˆ!G"‰eFYɧ|¹­>DYqø.z«ÌµèµbÞÁä+`tÏΨlJcQÜçÀ—;ZZ*Ía5(\QAÔ•iÚB­Zâhögâ¿ÕDZ ÌS¼ "ÔÂF xîõø)2üŒ±¦ C¬)Tk” 'é,ô&LÇòªuMãÄp¥ÈQ˜!Δ&mÙÁ eh&÷¦OÊJ?+ᔬOF<З«Í‹ɼ¼÷½ÒéyÑœRn¼Ç‡ˆæº¿¾Œ=˜vy†X™«K62ú¼b£š•ÁKµÄv“ªlêOH-¡p3ÜxëƒjÇ-[X-rà# ·N"ÿ`’>•ÞÖv“m!¾¶;O™¼Uð”&2•Øg‘—Ë™¤Úœ‹ô@­ò 82íû«Ä“㤶–êíÆù0‚x}nÎÆ}v3ð"¡·Ä«Ó ¶ä„¥¾(â[ÍÅ%ôQGÁ^v$‘0,ÑlÖò•ð“Ød+(7p?×ǴצO6Å-cÑ.°4?o³žOx«xÃC°×Íûۯ𯾵"ØQ‹/,QWïða4¼€¶;HÏFb—mCà6îddY- ¶KRS^7`^5µ&=+5Ö™ì*¶D™kÍ£S¤™éI­™}=í•-Ílcƒ»¿ý!À»¡-wìeö2Sh›$W/WÅ0sQÒ,˜†ÉO­å⺊ò#ã¨A;¸ÓØ­/.>‡$a@FÎjß{ùdã™.&·;.ûÉÖ?‘@wpj«¥~˜½Ñ~øÊ-ña•·áHÚ”YQÓåt–9«ùBOskÍ‘~ñ…¸³;˜Æ½moÓ»÷Q–ºRSýÕíóæn˜¦P›léì?[c™ÉE Ÿýzcë;³Ömž–‘k¶'³‚ÿTf¼€øE9ÎÛ,€·`…Vk•²­&Ý[¼:í"RMGRz‹W+ø*ÔŸzÒÝ~ÖVn¼eñ:¢ÅX¥‘‹Uú\E -G¨P2Á}˜¬½ ¶Z\ï*ëcª\èœo0œÒ6dY^<6™ýNMZj–"á ù"\Ø«7ºžü$†[9·ÿ*ZÃPpÌc4–SíUó@þhôW6Aê˜ÓÜð"ÛÙÖPØI?îÙ’ÄzYÿ6IJœ7J|ûoãOP”ãCzk¾yAKáâ)œ ál)N•pß,ç¾ùÒî²zí+¡Ìßaà+€ÚcV;ŒÍ|ø#¡² r &%J›ª¬VºÄ¬ËdQa”j´û ü NxsÆg ÕÞœqªDJŽý)jˆJ ”âÿñ©Ï,~Vü îìÂn&Ú•mFA&>:(Ç_ýËðÏtåÞØrd?¥ÎJSéMZ¦G“SF§fJS÷ &]‚>WïÀ@ÖƒæOJ½ xÖœÀj< ¸å÷ÊšGvî‹×nÞ¼é ´( &i:=‚+AŸ|3Ÿ+m(‰‚z¾Ð¤.3ÞàÃi„Pv°·As„†à[è_ƒËž¾ ŒS"$_‹ÁŽ"¯A„“èœ7¹>…”‹h9ðL¡ã·:“—–³5xd1ô çfm&Î}ÅÆ©’¯ÄÀâ2WšKùÕ÷K¯´‰ëe.i†L.UTMs½Í€m6Wö Ýý™ƒ=œ°xÏàð3_xÞ¢à0ŠÃAƒ´µ­¡´H€ç\Š¿(6Ψõ6 ÌÄFŠ«IxÙEL“UÝ6²ÀT^ΩSc•*óPZz¢Vo²ªY›2ßœOgæäd¥5(:ï|p÷Ñ™”öZ¥ÑÂTR%ðÀ]‹œ‚ŽƒG»Îö1ÙÕX*0“ph"ÿ(*¥lÛ¿ðŽXG~ÌAyìgÒ³h ±ù@Á gPìçP‘uý=,ÙùåØI yÜ,ÞPäêŸÍ=‘G¢¹Þ´<œûǧÙ{ÍH¸‘ŒÄ!œŽïßA -à= -®B°ÓÛ]9â¥n¯·á6Yâm³1—ü§,PKÊ8ïKüËûþ;oŸõ76þOCáÓ¿ÀJÐ~…ÚÝ‘ÝÔ&nk¨ëê«ÏŒ=¤â8'?“ŽU¶\gA“»º£K|cKßä”\CA"£ËÖ -è\mI¹‘Uáè ViÔ[Œ)HuáÖܪóbøùâ'‹a«‰E5„q9ãÓ½%Ý•‘}£³[!÷ W~` d±Æwó½Ô ÜôêU-þmùÉéOŽïY#d'@5áåý˜£ÉâtŠ;ÔÕÒ,•<-×–Q’ÅO8Õyk½PÙß,nTÔdÄ+¢_Ñ1úyà¸oÕIóï’„À£*‚!žQ0„7À0™´Öÿ 8´JMáv(RÐ÷*÷s†³Á/žÏ¦vã&RpÄÅ!ÈHÜHÜͧ«FâɃ¿]Wž$Ÿ’‚Žòg]Qõïæ¡ yú½%N»©”Ô­®¡ÿ U…··OÀû›ÄëC’¿ð\uƒÏÏnÞ_~å¶ÏCðÀ ›*¤é%™4€Éó¾÷vÜýÉÜ¢%‰كœ‹l0^2”«›ÃFŒ~ 996®=¹¯§­½§/¹ ¼`ðÍ/üvà Ípć0µ´e4:GÕ*ð­Ùîó,ÕÈ-¥%¥|€ 4×TtÐuuúü$5¥”…Û‡+ëJ\Þf§o´Ë[q¼æÁ¯ñ€ˆC +ðÓE€Ï'Ãl>Ž„hà,@Ñ[hZßBÁAñ¾ÿ¸Mrïë òeJ}­×xsi@ŒB¿`Ð_ ðö‡„³À£‚)×y`mq»GÜÚÚúz’Û½bøáÙòMà‚æ| Ët„Zû€®ˆ)Њ ùÙŽ›èuµNÒÁŽc°©pÊM”SWáo3„©°r#€«½å] a×5ÄéÊÍ¿+Wy{ê?q*>;^›Z9ëÚÕ —ºyÞƇŒ¬pºˆ‹€'óÑ‚ó\©h"Õ²¿k¶px”’¦ÑتTŒÛÛRà)ú§°oǤ°„p!ÈUK¹ÍQìÿ,…KŸ -endstream -endobj -1317 0 obj -<< -/Filter /FlateDecode -/Subtype /Type1C -/Length 11578 ->> -stream -xœzwxTÕÚ/CØ…½’I™Ùf³÷F&X*ˆˆ€ô -dÒë¤L’I&½Ìd&½·I2“BHB „Б*Š"¢¢Ç‚¢~õ¨krV<÷® úï»Ï½ß÷™Ì“é{­w½ëWÞwI&M™qxâð«Ã˜t¶t‰t·t¿4Iš*Í‘vJû¤¤Ÿ9Nv”9.s\åxÄÑß1Â1ɱƱÉñ¼ãÇGŽß:N38-rZéôšÓ:§ÍN¾NÉNeN§>§!§»N÷œ8}éôÓÿržê,u~ÆYtžã¼Ðyµó^gogµs–ss›s¯ó€óMçÏsþ§ í¹¼ä²Þ個K˜‹Ê%Î%É%եĥÅeÈå¬Ë—.w\î¹|íò™Df/s–Í’-‘mùËÔ²Y¦¬Tf–õÉÎÊ®ÊnÊÞ—}"ûRöDö£ìWÙ?åùT¹TÎÉÈWÈ7Ê÷È˽åAòpy´<^ž"7È«ä-ò>ù¨ü–üùòoä?Ê“³–fØéìv1»†ÝÄîc°¾lÍjØ46‡5°El9[Ï6³Ýìö2ûûýšý‘ý……5c"{'·>’wKNYŸE«¬ŸR}Ј C7«#Âiš×¹­ •"̦ ¤ßÑb3|E&!Ò/M2Ð0‰ê©,…+ΡéÁ=Æ? µs€õ6¦Têh8Ž<¢©³¹p*NRÁÁêÔpîX’霿A»£©þŒb8•A…Ô’÷G£]5#W„­Ô@µ¹àV1 Æ6ZX¯Ô*è|@DÑÖ&<4¤?­|!%ùHútô²P» -bË›•ÅæÆ BucÉÎ×*+`±ú°ha49 «ÈŒV¤êÒÓUÂ&´=ÜA¨Ïåå•(ª+Š-ùB0£ühª)·(·(=_?´ æ´Gg¦%¸‡U!Ñ\zFI©!ÏhÈ …†ÂE[Œ)P¥<Úr¹³­¼±Ahh2õu= ¬ÏN«=UVÖžGîn»<Mvç”gÄ(´ZeèëBr@F¬6 u »>á3ãD™¢¦¬°'_0#m4Õ›cÐ×o1ä3ò36¡µÓ¢o|KÄdW—+jËŠ[ ð§TÑTsvAVË8œ™f&¡3¼ñ3ºA4‘ð%ëœêî’’–Âé~ ²pN,•”••­å²Ò…±<$¦ †{yËŽ[ú=~ò¨B~aQž=N'D W-dcyQQ)WX¢Ë)SÚFRÛ¸Ç7®ýžr`Þ)qS[„¥SÑÑjéiiNkÔå Õ^4€Ö0ÉÌB3Ðì'ÏÁiÐõ»_àLøìÂ_+BÙ+õûÑJô¢Ú}Ïnõ98®¯½.‚±]c+Y©"ßÉ®ÉW¨µ!QÞ²C;=LÊ-Ôu…Ó“›r**Õ%ý‚Å@XЗ*ªS×o -‚ÛÐçÓжPkKÓÊu÷hx°…‚¾è6}ÈñmÖØ¢‚êü"¡¬¨¦ÐÂ}Óí=k[’_Hœ•”¹VOƒZ"?ÍXù:’-ÝÁ^ñNñ¥9ÊMñÖa¯×w¶œU -‘ÝɉïÒá(¯l.*)©áJÊs3Åijݙƒœÿ÷Ï¡Ë}Ïû[OˆhÞûĪ¾XËÅõS=ïœ>ïRè ¬ 7m¥Á0ßÖu2ìx (5.âÁ¾¤7x`fƒ -´Õ= -«ÚM f”¤|ÒŸÌ Ï4:øøG%ã–îÿÎøåÄÑšêŒÔ"±0=/ÏGqÈPÛÃj6÷^ß|f š1ïYôáÿíÐÿœ  Å›;¯2{ÇñȃsÑsÐÿ8ÉÜsK„2Êø~yÅûx×^®¬ºU*XPˆŠz?Ó·A®Pègë:¨HSmÒÞ3âx8¥ß­ÑnÈ u%\ÊFaé š˜@è´ez:œ„ë<¢yœ¢2Ñ,¢™´|×\gê‘v|þ4•UFe|¶ðÆK8ãà"FQº“5­Ú„üÕT›¾&NçO£áèo¢" šà èc§¡HÊ ‰ÐÓjèx]wMCÛŸßjÕWÇéið -ÓK?ãÒúåkÇÍù?”m¶ÆYg±ç‘+%kÛÌÈz×ó²Ëך\Wô¥hdïù3²E! ˆgà]`U²Æ²¢.¢î3Caƒâ¸º1"2.6L›W'”(‹ƒ ÃiÐØh®hâºj#÷‰(›Ò+µšà\:¦P`yAj¹îm5SÐ1gÔ£ºÔAbü2ymd-£CЩ€7Sºµ©i+0~ƒXE¬6=P'dëµúd=~a Ó_Žs"DcêÓ‹Ö¯ÐÞhªC_oPÒãÅø’ q!z|É -|ŸÈÀÏaû#ÚB”&3ÓMj¤¤†C¦2JÜíõ͆EÓ¾ƒÂ­oÌu׋§›P¬º‚j7«U‰ñšq«á\"©-§¬TQ[[ajMè‚šÉm¸22 h±æ€2Ô—KSÃS<éSm©,ª®n¥«5Hôb@Rž vöÁ“=HÀ¹ðu¸ÔVÀ‹¬ÞlÊ«æ¾8_fÏ·µ_¸©¸|aw»`ò *Þ¡OÕ&r4·:Q·¦ ‹ø5üÔ²„·š§Luø«'š³RkÄ–ø]Šyþǧjò Aº‘ÀöŒà¨cŠ=÷ƒ  \ýÅèö ¼o×ð8¿räl3Õ|âDKK»(ÝÈxk™ ýƒcÈãÙµ9…¹tAn~V¶"--!5U@r$'¬ëÑ®hꄾ:Áà7±¾šøЉ•0SG‹sŠôÃ4$©&ówHAÔ©ŠR2)ÉYz!jÍTjv~EºˆÜ`Ãßm®»Q‚ÿ<Àî†ÏÚÙõÕ -SyE½PjB1j²#£8mIàTÔ=-ü Â?)9!™ËÌ.,‹)Ca^A>¦³Æð UÀ±“ÁÃÃ=7Íä­Äi -‹» xŸÃk\_¥OÌ -O‹äßÞu²í/ì1ðIµ`<¦²O`ÎO(‡'[[Ož ÇïÖߨ­6ôÑ°€ú›ÛÅWwyDìöÀ÷y]ÖtJ~€Ëa|Ó®ÊÞ%a(| -ÐC"tFI"º¢"od×'ŽÒãi”Wdì>ê¬Ù&SEq෺„÷ª7—+^P/ ñ<‚ŽÄlå<µ•§E@é®V×u2¹WjêÏézº«®'Gu5ñº=ôx#Š°6ú1¤t//5I¬³ ”=ê–àÃ)Ãk»Ôb|{fGÎæ‘YŸ$ ‡öÐÝ!ž¦ýrXù<šfü”><ÓþÍM•ÁÕ,dòk{Ïr­uI¡%bA|~”á D×^ãÕ¸€îÌ¡#m¡§ ó EB¯Uù¼á.œ—”îJµmˆÀÈÉY8>gØÁB«‹¦.›‡¸M槼ŀ®ó1§¹ó—›ºÅî~ÓÛwð™eÿHA¯ÄáHŒ,Ë"óƒ¶+Ðê„}Ž% ÿ -·Õ\Á¹ºÛZ:¤€_§RhåV"Ž*;^WXÂÕk#DE6$§nãÔ0‡·%á ¸Ào`îóÀzµ+€(cà‹ŸÝ€S¡üåwÑ«±9y¹‡E¿)ÆšÒ¼rîÑо¹‹}ÜÖû¥Tõ‹`´¼©Ôl¢eÖ‹ià”âbÈÙ=íB“·géznzšIÙ¯O†.Zâí¶A©­ê­mSêI™õog®\~ôÈÊœ´p&/è‰VÀÈSY¼q±ª!‚€N|K±|Ú²G᧊RjRŠLqBXsFAx>™®ÕÆ+‚{ãÞèÉa Ýá/îÕ¤—Vä -„rêxtc2Ful¿ÿµ«Ã]˜$ÃBƒý[Ã{ñy -GŠd€o ÓÛÞÞÓzÂ_Ag!%6ÁbêzöñÄ.ázØΓë87ÿ¤QVÚ >dÆœL. ¯6öÁs]ë›úÁÕzƒì‡D ™¾€Ï1š„25§V©Ëµmñ"œµ„°~þ6HãSüž »{GHôáx:%<8+‚Û²½k4\ ?ŸüÞO -è ~nè¸Ó\“¡ÍÑeåf -Y©ºŒL…¦4©"G^î~ŠàU^Úi“#UuH€ø%<W'1¶Y†4US–œª8]½ŽZuFùsK}~y».*ní8ýr´&7=\ÈÑfè29mVQ…˜ˆq®…´dæØÁ>ØÇ¢DÃRß1¦hÇ/K`v`Α¥}’_ óÏ`A€ß²QŠ¦Žmb½}<;‚;;ƒ:½Ei¿’!` ©ÛKQÆ}I‰ k˜ØmvÔ%zW -å{Xƒ‡!*XžÇ^1ÖÆŽ€É?1àÌØÔLÀÞ&ù8+°ˆÀÎ@Ù#(Ûx¨tý·ð6…U'”M(¬–Vï­â;(„Fæ¼®ú/4Ùi)Z9#€®”¾³¢¾¹6ë2 s¶ûäð°²ÃÌ붸´¦¢Rh45U4r£æ]è08ÝÊá¡žž! ¸pê€Gx¸'½Èr—;±—‚ÕP…ð?¼æŽ/ Q‡éèl5 0‘Q…)&CG³“ÛjÁ{ß›4(Žnñûùz€MQ¨)‡ -Ó4vE`éKå.ö™ú,bms±¹p„–FjMF¼æU]o[óÚ#hÖ—dF)ÐEÊ Ô&Ú€:x(•ž‡OúŸÆãVöàùœo9Û*ÔŸ(鹪îÑ^‘b\`Ʊe -€Ò|—òp ¼?2‰À5,æ)¾´šÍ­Oñ…Œ‰‰ŠhŠiiijji‰iŠAÑBºz ëéïïåqÒètw÷i`â¥?3¥%˜õ ê뛹J@þˆù'ඊ - iŽ:ÑÞÜÜ~BÕ,‚ŒÍè©)8ùÚà'@y‘ïí>ÑÞÓÚî'¶ÁÂ"›*p1Q®“V±ç‰ä¯ãkÙS-µB¤Ò–Ôè+4 °ÕS¹"H0“½ ÕÜ© (/-S3NÀiâáéúDò€ÚtQ]4œI4‹2&Ç®¾Vƒýƒ*/ÛöϱI@¬èúEé÷pë)8딜汣à Êa@ Ä6-ãEl½“%;º€!ARyyZ —_dÌ/‰ -Ç݈‰}˜Ð:±ÖkÀx ÌäÝ¢–γ\_S¨hîǃìŸ$èRÜ>uøp°Ïö¨BÛ|ñŒµÙ -(i|½Õ§zU“ ÀˆìŸIò1’m+(,,,((Î/.üÚÞ~ ®ØRb©(+-(¬°wxÚÙ7É<é;Él‰I2,¹+“gOž79rÅäÎÉßÚM±[n·Ù.ÒNm÷Ñ”W¦lšR5¥nÊ}b1L\ ®ïŸ?“!äWÔkTUHÕQŸÐ›èP:—®¢ÿ˜:yê SÓ§~Â,f¶01Œ†IgŒÌ s H€ LžÀ:À5pÓ^jÏÚO·çíØ/³ßn¿ßþ°½—}®}½}“ýÇö_8Lr f9¸9¨RÚ>vøÌá±Ãw¿;ŒK'IIéé*éZé^©Fš*Í•öJ‡¥W¥w¥Hÿp´wtr|ÅñUÇ×}3›O;žs¼êxÇñsÇ_œœ^rÚáäéádp:îÔã4êtÓé§Oþæô›Ó¿œ§9Ïs^îü†ófg/ç`çXçTçRç&çÎ=ÎÎÃÎç?tþÑ…p™á2Óe¹Ëz—Ý.‡]¼]]’]Š]Z]κÜuùÂåw-“ËfÈž“=/[#Û-ó”EÉ2dY™¬ZÖ ë•–Ý½#»'ûJö›œ”?#Vþ¼ü%ù+òuò=òCò ¹Zž&Ï’åeòZy£¼MÞ%‘ß’$$ÿ^>ÆNb¥ì3ìLv»}™]ƾÉîb²Al›Áæ²¥l-{œígÏ°çÙ«ìmö=öûû»ëdW©«Âu®ë‹®Ë]_wÝàºÕu·«›ëWo×`×XWk¶k‘k•«Åõ¤ë°ëˆëE×kc{þ,I?Ènˆ{s¢j9ß[<çuÎà”ñAUå'FŒ¼UÔݬr­ñM}A-h8ü¶—xæfÂûÜï6vˆ§{š?üVq“Ò\èÌ>–àÍù…×uʼn '2:rîÒ°Ìú,i8¤‡î 9jÚÇ!§óÑ\4÷Ó…Ð铳íOn‰˜ÜײÐ>¿®ï,7\„ìŒb¬ÈkêyH‰uí<4úâÛ<|óƒÞh>Þ€ž¢>˜…“o"䇎íÄ -œFÔ,¨¼`è Hé"°†ÝdÓÒˆÜÜ\}6—œYR+Â*êóµ£HŠˆµßð1G÷÷µš;«²+µ5BzYnQ‘¢¡¥æ¤þd‹ÐVy!·:-T¡ÍMÊIÐô=š¿"R;s+ÊæêÒÁ|Áh«vˆtKEõëLéåÑ] §!½ì­“ˆìZ]q©¢­¢t´@° au*«,½ö0 ÿqZ+ wZ£àÚñ(¢…„G­?Vö56tæOI¼‹c¹[Bò²»³Ù·óÙOË0Á]&‘#Ü@¼Dâl .‘pÊR+iaר«ôçik°Æ²rCWRœ›] –¦åé ÏÓÈ -Ë.ŠöUŸ ºÍAúÁïpÙ½äwbÛDSR|Y4çZÓ‘"¶¥çëJ2èÊäâ(¥bÏ[»½:ð u:¢783G¯ÏÁõÀ*{BF®àÁ#^_ÿ§ÈOnKx*òÇ\ š 'ƒ×¨CChÿcûÖp;m:kèDš~n›êÏ*êvn½ÆÊèÝ“S7dÐ*ë} -lÌ3Y]\>âe­'ÍÛyÙéÅŒìÁ«<ø¬ÂM8ÚŸJPbNÿÏ> Ñ”ùhÊÄ>Úbÿ )½öESƒ™%éï4Æ;#Ý+kÅ,3€­{·š¨üâò²2dðpÖ—è±³f©ÙÙKÞx]\¿Ñwá<¢{^y´VØpÿ«ÀŸ¸Ÿ¿ê~ÿ}ñþýžo~UüêÿxÝû‡o,î~–Cß»°p?\„oûá>¸-BûÑ^ôzíZ°æÄíMÂæ;¾Vxm!šÕ‡MbilA‚J‘Ñ?Œ¡Ò{ìÒÑ‘Ð IòëÜуe/ÐpëxºŽ¼ «NèáäÒ jk+yz*2Ùm؇¦h%附IWÈý2©N‰æÂâZú®œ:õq~qk"ŸaryEÙËðŒ™Ý™ŸZ©¿AÃ=Ôhë@GIi¦¦\ˆ¬¯No溛[{†‚:ŽRFm:*Êî~ŠöDS§²ËS&:6i©Þ™8põ 0`L -³Ø(Ò­Úô:Òê8F5ã T:F´’g xºÎÓU: ݆q—?ˆië -H¬/Á ö,"‰’¸‚”tETBª· R˦t2Ú˜Y¬éBeÖŸ¦EÈgütÜÖ°`?Ýt5Tšb©ÀÈzsi~eI£ØóñLg?CžZL‚»le<”˜p±Þ7©ÈCMa„.« Þ¾ZÑ0(~Ú{ññ -Hnú~f§Z–h\_˜`˜VNéÏÖÔàÌ4¡ûj39¢«MЦS)ŸÜÍ9‘™¾ñÓWø¸-™¯˜ñåOý„ØÓWR¯sðE«†ý9ëâkh¦‰û_›“% Ù®sŠö?„¢μôð§"Aú -?ÑæXË'¦Çƒe¼©ë/ð&/ËÜÀÈ6üUÐÑøÚ“-ÙÜÇ1¼6NglUh88 ÿ]ˆ.‚ÙÇ(…œ(6«à]ê*J£€—)«+"¡Š¬5” -å%õE­Ü{ç‚ßê[ܪÖ/W,R/ûïµi˜Œ´t¬Ìª ©…ÚjtÅ*wC·“–Jð–=¥¢fåF+4Ù¡¡ -Cá ¦ŒïWWß+xÊ#­Ó,ÊJ; t¹‰5^E¾+¶, -¬Á®¶QU¯Ž§_‹ôõ;¨8V—p3F,ú^E~–SfÜGÏ¢†ÇnÞîS?,x]Šç†Û†,BlU¶N¯ÏÈ™žÆ\ØÛ9uÉFwz<œÒ¹'§àU¶¾KõBo"’ Ý¢ŽŒ <Ý4opÛ*°CH£~ïS"²\l†ŸG‘ÉhŠ_À, ã©îŠÒ _žO‰hÚ#«Â*7ÓfxhÛ^ÐCÐÀÿÕçm -êZ;ÌüUÅ‘µ5{ç`A½Óí~Â.ö‹ÐL¤6ê ‹55¥-B RW9qm;áËèâ4ôŠ­ ®†_›Â¶ðù—h¸™‚ -ØÒs¿¼|¨hº íS›¨ •&+QƒÍº+üÂB®)Í(Õ}@Ãä ƒµô'íƒ×>R@zÑm´z¢]ÿNvU’q½­]¯ß‘ë™Cç¦ét©"Ô[?b›j+:ux¡IÛS”Q1‚::8sÆ–²K¶Dïß*®Ûî¿MQ`ƒ¤@3 <ˆüþpÇMáÖ¶5í³¹‚@ö÷kh&“ƒÛÐÔ™G=[‡"ÅÐ Éïþª€N¦†/ -µ}œJ©‹U'ëÊ3S²s³²Ò„ô¤œ´TÌ7kTäÎe¾ž[÷Ó‘£èµuÀ÷î¾ãŸö ªrB©Ö$Çq©µ}"¼`ë_)d\xûöAe¸VUs£{ÍÖ®š†Âbºç|us­¢8§ ###G›$èõ:]®y¡•Ór(ŒJ«°»§ïŒÜþ›âî±á]‡‚CÀÓ½­«VÛööÈðêpÝ–@>þjëYÄh²ótQ"$ -Ê Å±»ò - Œ­šãqä¤ÿ𙞓ÃglÕÀþú¦­¦{SëKÈÂ$»S­'ÄNúÀ#h!| ݧÀ #âÐkh-V`xÝÑóXÍÀNr t„ÄEéƶîÐQî››¿`©ê|ø›ÍÑ1!Áõq]Yb_¦A_©¥«‹#<s=·­÷¬í‰Z®V (ÎÄ´(£CyVh:bÛ7ÖÃG¤tÜàI?\cÿ¬¨š¨¨¹åÒ@§@ç`¤ƒið{ÛÒÐjæÌeqØBæàRï=¿ãH@øÆáG -mÙ_¨&òK - ùœyÂn߇ÎÔÄ’1àò"@ýÕ[Á€§E’bKÑ9Zúú¸{¡Yä±?jþ¦Á†âòÖ&Ü:h²¾ZÀ£¿f·%ñà SÀ5Áøê§Ý¡Ò‰îPéŸÝ¡¬¤t]&•cë}eMg‡ë•/¾˜päHBד'uÝgDàþ˜ÖçØ>&F¨çW0î¿´òQ2õÊzâõÕ=©ÿKWßL=íûK­snß¾mج´Ä­£ŽKåÜTW¡²-W¯t´G+«D`s21;ù¿Úä{¡œx@¡^(‹qƒþ«ˆ?ìHðC_j;‘“’¥Ïà|BºßOÑä:6# r{B˜›’Èy©GqèÅ.ö6û ºC”Æk‹Ë -ó‹‹Ê„²êlŽG™ƒýUª€ÝWŽ¶ZmL¤:`"Ò2 ?¯žì…5½.ð,ô_Ä2®’ý:dïzj²Ì¸<Þ,†‰V²›¿z»;mÇ\†Ò‹² Þôÿ›à+5@;L p6È3IÀ+L»©ÎâãáK| ÐùÃçy`3XôæC[—*Þê?zm¤ãĹ¡°ÞÙYyú,AŸ«ËÓq©¥}ý÷GEðSl(ãþž°ÛÃc¼x[3a_b)ÄÉq“ô!vSÆS’ÞPœP©?GÃD˜…)ÝÁDíÛÙ¥o)pÞØÈC¹Jô­#€•€ Ùõ¥¯ç8x³!¿¦¤AŒ?ÞœÞÊõ¶4o©ŠˆÎJÂÛs[BxLt]U Íùp6¯«|ëɧmyœ&`aÿ”6 t®¡ö¶­pø­Šz'פÁ„‚|Qô¥ €1oÚâ›Ybºž´ ZWD¶jcµŒ¿QÆe©aœ)“ü³r|š?n MÀ”±Ä†žh9Z†Qâ \ŽVÀCø¶®À{?[…ˆàd yµ[µ­K0y²¦­EGV+ß„CÝî×|3Hð4ÚÎî2jêuØ#?K=cÑžæ ñðp>œùÆdb  õHœº½nM«l%:Œ¡H°€Qûz¬æ!F\„D©Bþ,ŸhjÅ”5)Ï £h‡³Ë PvÜ¥>' [ -åìe@<˜åщP.‡î8”+ð͇r9|Lœh02à+Æêcb=È]s‰:€Ö·‘…ßœùãû”ƒ=¢þ{¢\ä{ÀÓÎx}sV='›Ñ„¤€n¶Îa;˜/?¯½Ždñ9q…Œøt]—QfÑ‹ C¨ -Š,x‹C;à N 2¾‡ák¸¾Æçܶj·PÒ£¼ídÖ±Cõ¡~Š M\THrÓÈ`AM}« e­s¶ŒXnS?¼=Ž¿t›G[G¼>¼wîÜ=0aKá6“änogm7³ÐÚ£¸x¤oë[îž»|ð°Þ=ŸÞþÁ㋆:îw ðþý€ûî≯¯Š²ŽñgCXUtrf4§Œ7ÛjèÚH2 )=-ŽKO.(‹Í{Ž”ÛúÂ(€B¯A"¾)³²Jaª©hÀŸýÀæþø ÜÅþ¿èy°†æõ4€/ÃÅp6¾¿Œ×îþë@S½ °;`õ¨«ØÒ#ÞÑ‘jn‹÷Íäˆ:0ª={ºaÓ™x“ P·]ú†¥Q ‡9­:Ó7gwÚü:@î‰×I„Áÿ<ˆ/›(ƒ‰Ë©-J»i€Vgó¶îkn5†'yCô9)"¸*×àû×ìÆüŽ¹â£‰ÖPœó;¾yë‘ð@ni—áºnøj¯D«Ô*›š-øäZ×pKBJ• Á¤ãÖD*Р 1¿Â¨¨b÷)ÓÛï`±¿fÁåX[!þnÿ3 ïwre`¦ -ØÎ}àÜ%àr€u=˜8«’€qtK~‡Î6‰jÆ[È…8Cžu;2vä#$ Ñ³”º ©4gÞ3Œí0#@¿ìÌÊ -ÒOWk'T²_Ž–Pài³ì¤®¦$1R¸(Ïlç?Àÿñå¼£ -endstream -endobj -1323 0 obj -<< -/Length 8093 ->> -stream + -17.435 -19.926 Td [(11.)]TJ 0 g 0 G + [-500(If)-268(the)-268(matrix)-268(is)-268(in)-268(the)-268(update)-268(state,)-273(any)-268(entries)-268(in)-268(positions)-268(that)-268(wer)18(e)-268(not)]TJ 17.435 -11.955 Td [(pr)18(esent)-250(in)-250(the)-250(original)-250(matrix)-250(ar)18(e)-250(ignor)18(ed.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(5.3)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(gather)-250(\227)-250(Gather)-250(Global)-250(Dense)-250(Matrix)]TJ/F54 9.9626 Tf -51.12 -19.441 Td [(These)-280(subr)18(outines)-280(collect)-280(the)-280(portions)-280(of)-280(g)1(lobal)-280(dense)-280(matrix)-280(distributed)-280(over)]TJ 0 -11.955 Td [(all)-250(pr)18(ocess)-250(into)-250(one)-250(single)-250(array)-250(stor)18(ed)-250(on)-250(one)-250(pr)18(ocess.)]TJ/F52 9.9626 Tf 120.161 -25.465 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 289.521 649.467 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 292.803 649.268 Td [(x)]TJ/F83 10.3811 Tf 8.097 0 Td [(\040)]TJ/F52 9.9626 Tf 13.398 0 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ/F85 10.3811 Tf 27.705 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(l)-55(o)-35(c)]TJ -ET -q -1 0 0 1 359.144 649.467 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 362.427 649.268 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F85 10.3811 Tf 2.875 1.96 Td [(\051)]TJ/F54 9.9626 Tf -219.744 -22.41 Td [(wher)18(e:)]TJ + 141.968 -360.647 Td [(85)]TJ 0 g 0 G -/F52 9.9626 Tf 0.344 -20.664 Td [(g)-25(l)-55(o)-35(b)]TJ ET -q -1 0 0 1 169.703 606.393 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 172.986 606.194 Td [(x)]TJ + +endstream +endobj +1563 0 obj +<< +/Length 6789 +>> +stream 0 g 0 G -/F54 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(submatrix)]TJ/F52 9.9626 Tf 103.256 0 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 305.084 606.393 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 308.366 606.194 Td [(x)]TJ/F54 7.5716 Tf 5.106 -1.858 Td [(1)-13(:)]TJ/F52 7.5716 Tf 5.963 0 Td [(m)]TJ/F54 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F52 7.5716 Tf 7.856 0 Td [(n)]TJ 0 g 0 G -/F52 9.9626 Tf -182.447 -19.051 Td [(l)-55(o)-35(c)]TJ +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.9)-1000(psb)]TJ ET q -1 0 0 1 163.696 585.484 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F52 9.9626 Tf 166.979 585.285 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(routine)]TJ 0 g 0 G -/F54 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 234.034 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(.)]TJ 0 g 0 G -/F52 9.9626 Tf -266.027 -20.91 Td [(c)-25(o)-35(l)-55(l)-55(e)-25(c)-25(t)]TJ +/F67 9.9626 Tf -51.12 -19.204 Td [(call)-525(psb_spasb\050a,)-525(desc_a,)-525(info)-525([,)-525(afmt,)-525(upd,)-1050(mold]\051)]TJ 0 g 0 G -/F54 9.9626 Tf 32.563 0 Td [(is)-250(the)-250(collect)-250(function.)]TJ +/F59 9.9626 Tf 0 -22.289 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -20.421 Td [(On)-250(Entry)]TJ 0 g 0 G +0 g 0 G + 0 -20.421 Td [(desc)]TJ ET q -1 0 0 1 230.392 543.107 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.663 534.539 Td [(x)]TJ/F52 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F54 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(y)]TJ/F51 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 530.753 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F54 9.9626 Tf 236.369 522.185 Td [(Integer)-8983(psb)]TJ -ET -q -1 0 0 1 373.603 522.385 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 522.185 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ -ET -q -1 0 0 1 373.603 510.429 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 510.23 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ -ET -q -1 0 0 1 373.603 498.474 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 376.592 498.275 Td [(gather)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ -ET -q -1 0 0 1 373.603 486.519 cm +1 0 0 1 171.218 623.994 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 486.32 Td [(gather)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F59 9.9626 Tf 174.207 623.794 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F62 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 474.564 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 576.173 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 474.365 Td [(gather)]TJ +/F67 9.9626 Tf 363.206 575.974 Td [(desc)]TJ ET q -1 0 0 1 230.392 470.579 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 384.755 576.173 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 278.277 442.2 Td [(T)92(able)-250(19:)-310(Data)-250(types)]TJ -0 g 0 G -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -127.572 -27.052 Td [(call)]TJ +/F67 9.9626 Tf 387.893 575.974 Td [(type)]TJ 0 g 0 G - [-525(psb_gather\050glob_x,)-525(loc_x,)-525(desc_a,)-525(info,)-525(root\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-190(call)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - [-525(psb_gather\050glob_x,)-525(loc_x,)-525(desc_a,)-525(info,)-525(root\051)]TJ +/F59 9.9626 Tf -258.11 -20.421 Td [(afmt)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.902 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 26.012 0 Td [(the)-250(storage)-250(format)-250(for)-250(the)-250(sparse)-250(matrix.)]TJ -1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(array)-250(of)-250(characters.)-310(Defalt:)-310('CSR'.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -24.906 -20.42 Td [(upd)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.91 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 23.243 0 Td [(Pr)18(ovide)-250(for)-250(updates)-250(to)-250(the)-250(matrix)-250(coef)18(\002cients.)]TJ 1.663 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integer)74(,)-250(possible)-250(values:)]TJ/F67 9.9626 Tf 165.219 0 Td [(psb_upd_srch_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)]TJ/F67 9.9626 Tf 4.981 0 Td [(psb_upd_perm_)]TJ 0 g 0 G +/F59 9.9626 Tf -263.1 -20.421 Td [(mold)]TJ 0 g 0 G - 0 -20.909 Td [(loc)]TJ +/F62 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F67 9.9626 Tf 201.393 0 Td [(psb)]TJ ET q -1 0 0 1 164.583 350.626 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 393.323 371.449 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 167.571 350.427 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ +/F67 9.9626 Tf 396.461 371.249 Td [(T)]TJ ET q -1 0 0 1 371.853 350.626 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 402.319 371.449 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F52 9.9626 Tf 375.135 350.427 Td [(x)]TJ/F54 9.9626 Tf 5.206 0 Td [(.)]TJ -204.73 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-207(as:)-289(a)-208(rank)-207(one)-208(or)-207(two)-207(array)-208(or)-207(an)-208(object)-207(of)-208(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 244.743 0 Td [(psb)]TJ +/F67 9.9626 Tf 405.457 371.249 Td [(base)]TJ ET q -1 0 0 1 436.673 302.805 cm +1 0 0 1 427.006 371.449 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 439.811 302.606 Td [(T)]TJ +/F67 9.9626 Tf 430.144 371.249 Td [(sparse)]TJ ET q -1 0 0 1 445.669 302.805 cm +1 0 0 1 462.154 371.449 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 448.807 302.606 Td [(vect)]TJ +/F67 9.9626 Tf 465.292 371.249 Td [(mat)]TJ/F62 9.9626 Tf 15.691 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -330.278 -22.289 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.421 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 470.356 302.805 cm +1 0 0 1 360.068 280.918 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 473.495 302.606 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf -297.884 -11.955 Td [(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(19)]TJ +/F67 9.9626 Tf 363.206 280.719 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 280.918 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 398.354 280.719 Td [(type)]TJ 0 g 0 G - [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.91 Td [(desc)]TJ +/F59 9.9626 Tf -268.57 -20.421 Td [(desc)]TJ ET q -1 0 0 1 171.218 269.941 cm +1 0 0 1 171.218 260.497 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 269.741 Td [(a)]TJ +/F59 9.9626 Tf 174.207 260.298 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F62 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.956 Td [(Speci\002ed)-290(as:)-389(a)-290(str)8(uctur)18(ed)-290(data)-289(of)-290(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 171.305 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 222.12 cm +1 0 0 1 363.235 212.677 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 221.921 Td [(desc)]TJ +/F67 9.9626 Tf 366.373 212.477 Td [(desc)]TJ ET q -1 0 0 1 384.755 222.12 cm +1 0 0 1 387.922 212.677 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 221.921 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F67 9.9626 Tf 391.06 212.477 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.91 Td [(root)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)-429(If)-290(the)-290(matrix)-289(was)]TJ -236.371 -11.955 Td [(allocated)-209(with)]TJ/F67 9.9626 Tf 64.153 0 Td [(bldmode=psb_matbld_remote_)]TJ/F62 9.9626 Tf 135.988 0 Td [(,)-217(then)-210(the)-209(descriptor)-209(will)-209(be)]TJ -200.141 -11.955 Td [(r)18(eassembled.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.252 0 Td [(The)-253(pr)18(ocess)-254(that)-253(holds)-253(the)-253(global)-254(copy)111(.)-319(If)]TJ/F52 9.9626 Tf 182.635 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F85 10.3811 Tf 19.983 0 Td [(=)]TJ/F83 10.3811 Tf 11.147 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-253(all)-254(t)1(he)-254(pr)18(ocesses)-253(will)]TJ -220.305 -11.955 Td [(have)-250(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F83 10.3811 Tf 142.42 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)]TJ/F83 10.3811 Tf 7.873 0 Td [(\024)]TJ/F52 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F83 10.3811 Tf 19.923 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)]TJ/F83 10.3811 Tf 43.89 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1.)]TJ +/F59 9.9626 Tf -24.906 -20.421 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -301.107 -20.909 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -0 g 0 G -/F54 9.9626 Tf 166.874 -29.888 Td [(66)]TJ + 141.968 -29.888 Td [(86)]TJ 0 g 0 G ET endstream endobj -1328 0 obj +1567 0 obj << -/Length 1417 +/Length 3146 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(glob)]TJ -ET -q -1 0 0 1 120.976 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.965 706.129 Td [(x)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(wher)18(e)-250(the)-250(local)-250(parts)-250(must)-250(be)-250(gather)18(ed.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)-250(with)-250(the)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 202.459 0 Td [(ALLOCATABLE)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(On)-226(entry)-227(to)-226(this)-227(r)18(outine)-226(the)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F67 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ 0 g 0 G -/F54 9.9626 Tf 60.024 0 Td [(attribute.)]TJ + -62.017 -19.925 Td [(2.)]TJ 0 g 0 G -/F51 9.9626 Tf -287.39 -19.925 Td [(info)]TJ + [-500(The)-250(sparse)-250(matrix)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(update)-250(state;)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + 0 -19.925 Td [(3.)]TJ 0 g 0 G - 141.968 -500.124 Td [(67)]TJ + [-500(Duplicate)-421(entries)-422(ar)18(e)-421(detected)-421(and)-422(handled)-421(in)-421(both)-421(build)-422(and)-421(update)]TJ 12.453 -11.955 Td [(state,)-244(wit)1(h)-242(the)-242(exception)-242(of)-242(the)-242(err)18(or)-242(action)-242(that)-242(is)-242(only)-241(taken)-242(in)-242(the)-242(build)]TJ 0 -11.955 Td [(state,)-250(i.e.)-310(on)-250(the)-250(\002rst)-250(assembly;)]TJ +0 g 0 G + -12.453 -19.926 Td [(4.)]TJ +0 g 0 G + [-500(If)-190(the)-190(update)-190(choice)-190(is)]TJ/F67 9.9626 Tf 108.372 0 Td [(psb_upd_perm_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)-202(then)-190(subsequent)-190(calls)-190(to)]TJ/F67 9.9626 Tf 109.946 0 Td [(psb_spins)]TJ/F62 9.9626 Tf -273.86 -11.955 Td [(to)-309(update)-309(the)-308(matrix)-309(must)-309(be)-309(arranged)-309(in)-308(such)-309(a)-309(way)-309(as)-309(to)-308(pr)18(oduce)-309(ex-)]TJ 0 -11.955 Td [(actly)-319(the)-320(same)-319(sequence)-320(of)-319(coef)18(\002cient)-319(values)-320(as)-319(encounter)18(ed)-319(at)-320(the)-319(\002rst)]TJ 0 -11.955 Td [(assembly;)]TJ +0 g 0 G + -12.453 -19.926 Td [(5.)]TJ +0 g 0 G + [-500(The)-250(output)-250(storage)-250(format)-250(need)-250(not)-250(be)-250(the)-250(same)-250(on)-250(all)-250(pr)18(ocesses;)]TJ +0 g 0 G + 0 -19.925 Td [(6.)]TJ +0 g 0 G + [-500(On)-249(exit)-249(fr)18(om)-250(this)-249(r)18(outine)-249(the)-249(matrix)-249(is)-250(in)-249(the)-249(assembled)-249(state,)-250(and)-249(thus)-249(is)]TJ 12.453 -11.955 Td [(suitable)-250(for)-250(the)-250(computational)-250(r)18(outines;)]TJ +0 g 0 G + -12.453 -19.925 Td [(7.)]TJ +0 g 0 G + [-500(If)-431(the)]TJ/F67 9.9626 Tf 41.543 0 Td [(bldmode=psb_matbld_remote_)]TJ/F62 9.9626 Tf 140.288 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-272(the)-273(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.956 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-288(element)-288(applications,)-297(with)]TJ/F67 9.9626 Tf 145.88 0 Td [(dupl=psb_dupl_add_)]TJ/F62 9.9626 Tf 94.147 0 Td [(;)-307(it)-288(is)-287(necessary)-288(to)]TJ -240.027 -11.955 Td [(check)-236(for)-235(possible)-236(updates)-236(needed)-235(in)-236(the)-236(descriptor)74(,)-238(hence)-236(ther)18(e)-236(will)-235(be)-236(a)]TJ 0 -11.955 Td [(r)8(untime)-250(over)18(head.)]TJ +0 g 0 G + 141.968 -332.752 Td [(87)]TJ 0 g 0 G ET endstream endobj -1335 0 obj +1580 0 obj << -/Length 7178 +/Length 2987 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(5.4)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.10)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(scatter)-250(\227)-250(Scatter)-250(Global)-250(Dense)-250(Matrix)]TJ/F54 9.9626 Tf -51.12 -20.363 Td [(These)-223(subr)18(outines)-223(scatters)-224(the)-223(portions)-223(of)-224(global)-223(dense)-223(matrix)-223(owned)-224(by)-223(a)-223(pr)18(o-)]TJ 0 -11.955 Td [(cess)-250(to)-250(all)-250(the)-250(pr)18(ocesses)-250(in)-250(the)-250(pr)18(ocesses)-250(grid.)]TJ/F52 9.9626 Tf 119.478 -26.893 Td [(l)-55(o)-35(c)]TJ -ET -q -1 0 0 1 283.05 647.117 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 286.333 646.918 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 5.642 1.96 Td [(\040)]TJ/F52 9.9626 Tf 13.398 0 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ/F85 10.3811 Tf 28.632 0 Td [(\050)]TJ/F52 9.9626 Tf 4.493 0 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 362.3 647.117 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 365.583 646.918 Td [(x)]TJ/F85 10.3811 Tf 5.329 0 Td [(\051)]TJ/F54 9.9626 Tf -220.207 -23.362 Td [(wher)18(e:)]TJ -0 g 0 G -/F52 9.9626 Tf 0.344 -22.091 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 169.703 601.664 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 172.986 601.465 Td [(x)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(spfree)-250(\227)-250(Frees)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -/F54 9.9626 Tf 10.187 0 Td [(is)-250(the)-250(global)-250(matrix)]TJ/F52 9.9626 Tf 87.515 0 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 289.343 601.664 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 292.626 601.465 Td [(x)]TJ/F54 7.5716 Tf 5.105 -1.858 Td [(1)-13(:)]TJ/F52 7.5716 Tf 5.963 0 Td [(m)]TJ/F54 7.5716 Tf 5.985 0 Td [(,1)-13(:)]TJ/F52 7.5716 Tf 7.856 0 Td [(n)]TJ 0 g 0 G -/F52 9.9626 Tf -166.706 -20.955 Td [(l)-55(o)-35(c)]TJ -ET -q -1 0 0 1 163.696 578.851 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 166.979 578.652 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_spfree\050a,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 7.732 1.96 Td [(is)-250(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)-250(on)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 234.034 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F52 9.9626 Tf -266.027 -22.813 Td [(s)-25(c)-40(a)-25(t)-25(t)-25(e)-15(r)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 33.489 0 Td [(is)-250(the)-250(scatter)-250(function.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G + 0 -19.925 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.137 0 Td [(psb)]TJ ET q -1 0 0 1 230.392 532.667 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S -Q -BT -/F52 9.9626 Tf 236.663 524.099 Td [(x)]TJ/F52 7.5716 Tf 5.148 -1.96 Td [(i)]TJ/F54 9.9626 Tf 2.75 1.96 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(y)]TJ/F51 9.9626 Tf 108.448 0 Td [(Subroutine)]TJ -ET -q -1 0 0 1 230.392 520.313 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 360.068 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 236.369 511.745 Td [(Integer)-8983(psb)]TJ +/F67 9.9626 Tf 363.206 577.576 Td [(Tspmat)]TJ ET q -1 0 0 1 373.603 511.945 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 395.216 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 511.745 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Real)-3287(psb)]TJ +/F67 9.9626 Tf 398.354 577.576 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ ET q -1 0 0 1 373.603 499.989 cm +1 0 0 1 171.218 557.85 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 376.592 499.79 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Real)-3366(psb)]TJ +/F59 9.9626 Tf 174.207 557.651 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 373.603 488.034 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 487.835 Td [(scatter)]TJ -140.223 -11.955 Td [(Short)-250(Pr)18(ecision)-250(Complex)-1200(psb)]TJ +/F67 9.9626 Tf 363.206 509.83 Td [(desc)]TJ ET q -1 0 0 1 373.603 476.079 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 384.755 510.029 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 376.592 475.88 Td [(scatter)]TJ -140.223 -11.955 Td [(Long)-250(Pr)18(ecision)-250(Complex)-1279(psb)]TJ +/F67 9.9626 Tf 387.893 509.83 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -329.728 Td [(88)]TJ +0 g 0 G ET -q -1 0 0 1 373.603 464.124 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1586 0 obj +<< +/Length 3858 +>> +stream +0 g 0 G +0 g 0 G BT -/F54 9.9626 Tf 376.592 463.925 Td [(scatter)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.11)-1000(psb)]TJ ET q -1 0 0 1 230.392 460.139 cm -[]0 d 0 J 0.398 w 0 0 m 184.337 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0 g 0 G BT -/F54 9.9626 Tf 278.277 431.76 Td [(T)92(able)-250(20:)-310(Data)-250(types)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(sprn)-254(\227)-255(Reinit)-254(sparse)-255(matrix)-254(structure)-254(for)-255(psblas)-254(rou-)]TJ -24.221 -13.948 Td [(tines.)]TJ 0 g 0 G 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf -112.628 -28.004 Td [(call)]TJ +/F67 9.9626 Tf -32.877 -18.964 Td [(call)-525(psb_sprn\050a,)-525(decsc_a,)-525(info,)-525(clear\051)]TJ 0 g 0 G - [-525(psb_scatter\050glob_x,)-525(loc_x,)-525(desc_a,)-525(info,)-525(root,)-525(mold\051)]TJ +/F59 9.9626 Tf 0 -21.917 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -24.806 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -29.828 -19.926 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.813 Td [(On)-250(Entry)]TJ 0 g 0 G + 0 -19.925 Td [(a)]TJ 0 g 0 G - 0 -22.813 Td [(glob)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(r)18(einitialized.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 171.786 333.523 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 309.258 563.828 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 174.774 333.324 Td [(x)]TJ +/F67 9.9626 Tf 312.397 563.628 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 563.828 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 347.544 563.628 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(array)-250(that)-250(must)-250(be)-250(scatter)18(ed)-250(into)-250(local)-250(pieces.)]TJ -9.126 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(or)-250(two)-250(array)111(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -22.814 Td [(desc)]TJ +/F59 9.9626 Tf -268.571 -19.925 Td [(desc)]TJ ET q -1 0 0 1 171.218 262.89 cm +1 0 0 1 120.408 543.902 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 262.69 Td [(a)]TJ +/F59 9.9626 Tf 123.397 543.703 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 215.069 cm +1 0 0 1 309.258 496.082 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 214.87 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 495.882 Td [(desc)]TJ ET q -1 0 0 1 384.755 215.069 cm +1 0 0 1 333.945 496.082 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 214.87 Td [(type)]TJ +/F67 9.9626 Tf 337.084 495.882 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -22.813 Td [(root)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(clear)]TJ 0 g 0 G -/F54 9.9626 Tf 23.252 0 Td [(The)-218(pr)18(ocess)-218(that)-218(holds)-219(t)1(he)-219(global)-218(copy)111(.)-299(If)]TJ/F52 9.9626 Tf 179.982 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F85 10.3811 Tf 19.922 0 Td [(=)]TJ/F83 10.3811 Tf 11.086 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-218(all)-218(the)-218(pr)18(ocesses)-219(have)]TJ -217.53 -11.956 Td [(a)-250(copy)-250(of)-250(the)-250(global)-250(vector)74(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-258(as:)-327(an)-258(integer)-259(variable)]TJ/F83 10.3811 Tf 142.917 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)]TJ/F83 10.3811 Tf 8.027 0 Td [(\024)]TJ/F52 9.9626 Tf 11.139 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F83 10.3811 Tf 20.077 0 Td [(\024)]TJ/F52 9.9626 Tf 11.239 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.534 0 Td [(\000)]TJ/F54 9.9626 Tf 10.162 0 Td [(1,)-260(default)]TJ/F59 9.9626 Tf 43.952 0 Td [(psb_root_)]TJ/F54 9.9626 Tf 47.073 0 Td [(,)]TJ -316.314 -11.955 Td [(i.e.)-310(pr)18(ocess)-250(0.)]TJ +/F62 9.9626 Tf 26.561 0 Td [(Choose)-250(whether)-250(to)-250(zer)18(o)-250(out)-250(matrix)-250(coef)18(\002cients)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)-310(tr)8(ue.)]TJ 0 g 0 G - 141.968 -29.888 Td [(68)]TJ +/F59 9.9626 Tf -24.907 -21.917 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(update)-250(state.)]TJ +0 g 0 G + 154.421 -206.192 Td [(89)]TJ 0 g 0 G ET endstream endobj -1342 0 obj +1593 0 obj << -/Length 3984 +/Length 6166 >> stream 0 g 0 G 0 g 0 G -0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(mold)]TJ -0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F59 9.9626 Tf 199.086 0 Td [(psb)]TJ -ET -q -1 0 0 1 340.207 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 343.345 658.308 Td [(T)]TJ -ET -q -1 0 0 1 349.203 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 352.341 658.308 Td [(base)]TJ -ET -q -1 0 0 1 373.89 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 377.028 658.308 Td [(vect)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.12)-1000(psb)]TJ ET q -1 0 0 1 398.577 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 401.716 658.308 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)-250(loc)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geall\050x,)-525(desc_a,)-525(info[,)-525(dupl,)-525(bldmode,)-525(n,)-525(lb]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -19.627 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.01 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.009 Td [(desc)]TJ ET q -1 0 0 1 234.988 646.552 cm +1 0 0 1 171.218 629.719 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 237.976 646.353 Td [(x)-250(is)-250(of)-250(type)]TJ +/F59 9.9626 Tf 174.207 629.519 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 50.53 0 Td [(psb)]TJ -ET -q -1 0 0 1 304.825 646.552 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 307.963 646.353 Td [(T)]TJ +/F67 9.9626 Tf 136.328 0 Td [(psb)]TJ ET q -1 0 0 1 313.821 646.552 cm +1 0 0 1 328.257 581.898 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 316.959 646.353 Td [(vect)]TJ +/F67 9.9626 Tf 331.395 581.699 Td [(desc)]TJ ET q -1 0 0 1 338.508 646.552 cm +1 0 0 1 352.944 581.898 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 341.646 646.353 Td [(type)]TJ +/F67 9.9626 Tf 356.083 581.699 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -262.673 -19.925 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -226.299 -30.965 Td [(n)]TJ 0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(the)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F60 9.9626 Tf 295.578 0 Td [(x)]TJ/F62 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.956 Td [(rank-1)-250(array)111(.)]TJ 0 g 0 G - 0 -19.926 Td [(loc)]TJ -ET -q -1 0 0 1 113.773 606.702 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 116.762 606.502 Td [(x)]TJ +/F59 9.9626 Tf -24.906 -19.009 Td [(lb)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(dense)-250(matrix)]TJ/F52 9.9626 Tf 175.664 0 Td [(g)-25(l)-55(o)-35(b)]TJ -ET -q -1 0 0 1 321.043 606.702 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F52 9.9626 Tf 324.326 606.502 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ -204.729 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-214(as:)-292(a)-215(rank)-214(one)-214(or)-214(two)-215(ALLOCA)74(T)74(ABLE)-214(array)-214(or)-214(an)-215(object)-214(of)-214(type)]TJ +/F62 9.9626 Tf 14.386 0 Td [(The)-237(lower)-238(bound)-237(for)-238(the)-237(column)-238(index)-237(range)-237(of)-238(the)-237(dense)-238(matrix)-237(to)-238(be)-237(allo-)]TJ 10.52 -11.955 Td [(cated.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F60 9.9626 Tf 295.578 0 Td [(x)]TJ/F62 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.955 Td [(rank-1)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.009 Td [(dupl)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F67 9.9626 Tf 164.942 0 Td [(psb_dupl_ovwrt_)]TJ/F62 9.9626 Tf 78.455 0 Td [(,)]TJ/F67 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F62 9.9626 Tf 67.994 0 Td [(;)]TJ/F67 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F62 9.9626 Tf 70.485 0 Td [(has)-250(no)-250(ef)18(fect.)]TJ +0 g 0 G +/F59 9.9626 Tf -95.391 -19.009 Td [(bldmode)]TJ +0 g 0 G +/F62 9.9626 Tf 45.937 0 Td [(Whether)-372(to)-372(keep)-371(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F67 9.9626 Tf 128.288 0 Td [(psb_matbld_noremote_)]TJ/F62 9.9626 Tf 104.607 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 94.146 0 Td [(.)]TJ -331.544 -11.955 Td [(Default:)]TJ/F67 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F62 9.9626 Tf 104.607 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -168.028 -19.627 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.009 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-273(as:)-357(a)-273(rank)-274(one)-273(or)-274(two)-273(array)-273(with)-274(the)-273(ALLOCA)74(T)74(ABLE)-273(attribute)]TJ 0 -11.955 Td [(or)-250(an)-250(object)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 0 -11.955 Td [(psb)]TJ +/F67 9.9626 Tf 86.634 0 Td [(psb)]TJ ET q -1 0 0 1 141.121 546.926 cm +1 0 0 1 278.564 132.48 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 144.259 546.727 Td [(T)]TJ +/F67 9.9626 Tf 281.702 132.281 Td [(T)]TJ ET q -1 0 0 1 150.117 546.926 cm +1 0 0 1 287.56 132.48 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 153.255 546.727 Td [(vect)]TJ +/F67 9.9626 Tf 290.699 132.281 Td [(vect)]TJ ET q -1 0 0 1 174.804 546.926 cm +1 0 0 1 312.247 132.48 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 177.942 546.727 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 23.412 0 Td [(containing)-250(numbers)-250(of)-250(the)-250(type)-250(indicated)-250(in)-250(T)92(able)]TJ -0 0 1 rg 0 0 1 RG - [-250(20)]TJ +/F67 9.9626 Tf 315.386 132.281 Td [(type)]TJ 0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -101.459 -19.926 Td [(info)]TJ +/F62 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -0 g 0 G - 141.968 -388.543 Td [(69)]TJ + -18.728 -41.843 Td [(90)]TJ 0 g 0 G ET endstream endobj -1346 0 obj +1597 0 obj << -/Length 6319 +/Length 925 >> stream 0 g 0 G 0 g 0 G -BT -/F51 14.3462 Tf 150.705 706.042 Td [(6)-1000(Data)-250(management)-250(routines)]TJ/F51 11.9552 Tf 0 -24.694 Td [(6.1)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 681.547 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 681.348 Td [(cdall)-250(\227)-250(Allocates)-250(a)-250(communication)-250(descriptor)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,parts=parts\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vg=vg,[mg=mg,flag=flag]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,vl=vl,[nl=nl,globalcheck=.false.,lidx=lidx]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,nl=nl\051)]TJ 0 -11.956 Td [(call)-525(psb_cdall\050icontxt,)-525(desc_a,)-525(info,mg=mg,repl=.true.\051)]TJ/F54 9.9626 Tf 14.944 -19.771 Td [(This)-377(subr)18(outine)-378(initializes)-377(the)-378(communication)-377(descriptor)-378(associated)-377(with)]TJ -14.944 -11.956 Td [(an)-271(index)-271(space.)-373(One)-272(o)1(f)-272(the)-271(optional)-271(ar)18(guments)]TJ/F59 9.9626 Tf 209.77 0 Td [(parts)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)]TJ/F59 9.9626 Tf 5.244 0 Td [(vg)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)]TJ/F59 9.9626 Tf 5.244 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)]TJ/F59 9.9626 Tf 5.244 0 Td [(nl)]TJ/F54 9.9626 Tf 13.161 0 Td [(or)]TJ/F59 9.9626 Tf 12.076 0 Td [(repl)]TJ/F54 9.9626 Tf 23.622 0 Td [(must)]TJ -321.435 -11.955 Td [(be)-250(speci\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(speci\002c)-250(initialization)-250(strategy)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -18.208 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.067 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.067 Td [(icontxt)]TJ -0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.066 Td [(vg)]TJ -0 g 0 G -/F54 9.9626 Tf 16.06 0 Td [(Data)-250(allocation:)-310(each)-250(index)]TJ/F52 9.9626 Tf 121.707 0 Td [(i)]TJ/F83 10.3811 Tf 5.856 0 Td [(2)-290(f)]TJ/F54 9.9626 Tf 15.245 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F83 10.3811 Tf 13.449 0 Td [(g)]TJ/F54 9.9626 Tf 7.806 0 Td [(is)-250(allocated)-250(to)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 98.454 0 Td [(v)-47(g)]TJ/F85 10.3811 Tf 10.68 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)]TJ -295.759 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.067 Td [(\003ag)]TJ -0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(Speci\002es)-250(whether)-250(entries)-250(in)]TJ/F52 9.9626 Tf 123.401 0 Td [(v)-47(g)]TJ/F54 9.9626 Tf 13.046 0 Td [(ar)18(e)-250(zer)18(o-)-250(or)-250(one-based.)]TJ -133.129 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0,)-167(1,)-250(default)-250(0.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.067 Td [(mg)]TJ 0 g 0 G -/F54 9.9626 Tf 19.377 0 Td [(the)-250(\050global\051)-250(number)-250(of)-250(r)18(ows)-250(of)-250(the)-250(pr)18(oblem.)]TJ 5.53 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-262(as:)-335(an)-263(integer)-262(value.)-348(It)-262(is)-262(r)18(equir)18(ed)-263(if)]TJ/F59 9.9626 Tf 203.091 0 Td [(parts)]TJ/F54 9.9626 Tf 28.766 0 Td [(or)]TJ/F59 9.9626 Tf 11.99 0 Td [(repl)]TJ/F54 9.9626 Tf 23.536 0 Td [(is)-262(speci\002ed,)]TJ -267.383 -11.955 Td [(it)-250(is)-250(optional)-250(if)]TJ/F59 9.9626 Tf 66.141 0 Td [(vg)]TJ/F54 9.9626 Tf 12.951 0 Td [(is)-250(speci\002ed.)]TJ -0 g 0 G -/F51 9.9626 Tf -103.999 -19.067 Td [(parts)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(the)-250(subr)18(outine)-250(that)-250(de\002nes)-250(the)-250(partitioning)-250(scheme.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(subr)18(outine.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.067 Td [(vl)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Data)-293(allocation:)-395(the)-293(set)-292(of)-293(global)-293(i)1(ndices)]TJ/F52 9.9626 Tf 181.166 0 Td [(v)-25(l)]TJ/F85 10.3811 Tf 8.548 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-369(:)]TJ/F52 9.9626 Tf 14.955 0 Td [(n)-25(l)]TJ/F85 10.3811 Tf 9.105 0 Td [(\051)]TJ/F54 9.9626 Tf 7.065 0 Td [(belonging)-293(to)-292(the)-293(calling)]TJ -213.919 -11.955 Td [(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ + [-500(Using)]TJ/F67 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F62 9.9626 Tf 97.28 0 Td [(is)-314(likely)-315(to)-315(cau)1(se)-315(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ 0 g 0 G - 141.967 -29.887 Td [(70)]TJ + 141.968 -514.072 Td [(91)]TJ 0 g 0 G ET endstream endobj -1233 0 obj +1492 0 obj << /Type /ObjStm /N 100 -/First 996 -/Length 12673 ->> -stream -280 0 1231 57 1227 115 1235 248 1225 396 1226 540 1237 687 1234 746 1244 840 1238 1015 -1239 1157 1240 1302 1241 1449 1242 1593 1246 1740 284 1798 1247 1855 1243 1913 1250 2046 1248 2185 -1252 2333 288 2392 1249 2450 1258 2531 1253 2688 1254 2832 1255 2979 1260 3126 292 3184 1261 3241 -1257 3299 1265 3433 1269 3581 1270 3708 1271 3751 1272 3958 1273 4196 1274 4472 1256 4708 1263 4855 -1267 5002 1268 5061 1264 5120 1278 5256 1280 5374 1277 5432 1286 5500 1282 5657 1283 5801 1284 5946 -1288 6093 296 6152 1289 6210 1285 6269 1295 6402 1290 6559 1292 6705 1293 6851 1297 6996 1298 7054 -1299 7112 1300 7170 1294 7228 1303 7335 1305 7453 1302 7512 1307 7580 1310 7698 1311 7825 1312 7868 -1313 8075 1314 8313 1315 8589 1309 8825 1301 8883 1306 8941 1322 9038 1318 9195 1319 9336 1320 9483 -1324 9630 300 9689 1325 9747 1321 9806 1327 9939 1329 10057 1326 10115 1334 10209 1331 10348 1336 10495 -304 10554 1337 10612 1333 10671 1341 10804 1332 10961 1338 11104 1339 11247 1343 11394 1340 11452 1345 11559 -% 280 0 obj -<< -/D [1228 0 R /XYZ 99.895 716.092 null] ->> -% 1231 0 obj -<< -/D [1228 0 R /XYZ 99.895 560.161 null] +/First 979 +/Length 10305 >> -% 1227 0 obj +stream +1487 0 1496 94 1493 242 1494 387 1498 534 324 592 1495 649 1501 743 1499 882 1503 1027 +328 1086 1500 1144 1507 1238 1504 1386 1505 1531 1509 1678 332 1736 1506 1793 1512 1913 1514 2031 +1515 2090 1516 2149 1511 2208 1520 2289 1517 2437 1518 2584 1522 2729 336 2787 1523 2844 1519 2902 +1525 2996 1527 3114 1528 3173 1529 3232 1530 3291 1524 3350 1533 3444 1535 3562 340 3620 1532 3677 +1539 3797 1531 3954 1536 4097 1537 4242 1541 4385 1542 4444 1543 4502 1544 4561 1545 4620 1546 4679 +1538 4738 1548 4858 1550 4976 1551 5034 1552 5092 1553 5150 1554 5208 1555 5266 1556 5324 1547 5382 +1562 5502 1558 5659 1559 5806 1560 5951 1564 6097 344 6156 1561 6214 1566 6308 1568 6426 1569 6484 +1570 6542 1571 6600 1572 6658 1573 6716 1574 6774 1575 6832 1565 6890 1579 6984 1576 7132 1577 7275 +1581 7422 348 7481 1578 7539 1585 7633 1582 7781 1583 7926 1587 8073 352 8131 1588 8188 1584 8246 +1592 8340 1589 8488 1590 8631 1594 8775 356 8834 1591 8892 1596 8999 1598 9117 1599 9175 1595 9232 +% 1487 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1235 0 obj +% 1496 0 obj << /Type /Page -/Contents 1236 0 R -/Resources 1234 0 R +/Contents 1497 0 R +/Resources 1495 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1225 0 R 1226 0 R ] +/Parent 1471 0 R +/Annots [ 1493 0 R 1494 0 R ] >> -% 1225 0 obj +% 1493 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [314.141 702.323 390.195 714.383] -/A << /S /GoTo /D (vdata) >> +/Rect [291.943 573.77 359.001 585.83] +/A << /S /GoTo /D (descdata) >> >> -% 1226 0 obj +% 1494 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [293.677 690.368 305.632 702.428] -/A << /S /GoTo /D (table.14) >> +/Rect [291.943 484.107 359.001 496.166] +/A << /S /GoTo /D (descdata) >> >> -% 1237 0 obj +% 1498 0 obj << -/D [1235 0 R /XYZ 149.705 753.953 null] +/D [1496 0 R /XYZ 98.895 753.953 null] >> -% 1234 0 obj +% 324 0 obj << -/Font << /F54 586 0 R /F59 812 0 R /F51 584 0 R >> +/D [1496 0 R /XYZ 99.895 716.092 null] +>> +% 1495 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1244 0 obj +% 1501 0 obj << /Type /Page -/Contents 1245 0 R -/Resources 1243 0 R +/Contents 1502 0 R +/Resources 1500 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1238 0 R 1239 0 R 1240 0 R 1241 0 R 1242 0 R ] ->> -% 1238 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 430.55 339.385 442.61] -/A << /S /GoTo /D (vdata) >> ->> -% 1239 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [224.557 418.595 231.53 430.655] -/A << /S /GoTo /D (table.2) >> +/Parent 1471 0 R +/Annots [ 1499 0 R ] >> -% 1240 0 obj +% 1499 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [256.048 352.283 323.106 364.342] +/Rect [342.753 573.77 409.811 585.83] /A << /S /GoTo /D (descdata) >> >> -% 1241 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [263.331 201.166 339.385 213.226] -/A << /S /GoTo /D (vdata) >> ->> -% 1242 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [242.868 189.211 254.823 201.271] -/A << /S /GoTo /D (table.16) >> ->> -% 1246 0 obj -<< -/D [1244 0 R /XYZ 98.895 753.953 null] ->> -% 284 0 obj +% 1503 0 obj << -/D [1244 0 R /XYZ 99.895 716.092 null] +/D [1501 0 R /XYZ 149.705 753.953 null] >> -% 1247 0 obj +% 328 0 obj << -/D [1244 0 R /XYZ 99.895 566.828 null] +/D [1501 0 R /XYZ 150.705 716.092 null] >> -% 1243 0 obj +% 1500 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1250 0 obj +% 1507 0 obj << /Type /Page -/Contents 1251 0 R -/Resources 1249 0 R +/Contents 1508 0 R +/Resources 1506 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1248 0 R ] +/Parent 1510 0 R +/Annots [ 1504 0 R 1505 0 R ] >> -% 1248 0 obj +% 1504 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [378.029 655.624 385.003 667.684] -/A << /S /GoTo /D (section.6) >> +/Rect [291.943 452.321 369.462 464.381] +/A << /S /GoTo /D (spdata) >> +>> +% 1505 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 209.193 359.001 221.252] +/A << /S /GoTo /D (descdata) >> >> -% 1252 0 obj +% 1509 0 obj << -/D [1250 0 R /XYZ 149.705 753.953 null] +/D [1507 0 R /XYZ 98.895 753.953 null] >> -% 288 0 obj +% 332 0 obj << -/D [1250 0 R /XYZ 150.705 716.092 null] +/D [1507 0 R /XYZ 99.895 716.092 null] >> -% 1249 0 obj +% 1506 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1258 0 obj +% 1512 0 obj << /Type /Page -/Contents 1259 0 R -/Resources 1257 0 R +/Contents 1513 0 R +/Resources 1511 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1253 0 R 1254 0 R 1255 0 R ] +/Parent 1510 0 R >> -% 1253 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.549 344.818 444.603 356.877] -/A << /S /GoTo /D (vdata) >> ->> -% 1254 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [326.652 332.863 338.608 344.922] -/A << /S /GoTo /D (table.17) >> ->> -% 1255 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 264.733 359.001 276.793] -/A << /S /GoTo /D (descdata) >> ->> -% 1260 0 obj +% 1514 0 obj << -/D [1258 0 R /XYZ 98.895 753.953 null] +/D [1512 0 R /XYZ 149.705 753.953 null] >> -% 292 0 obj +% 1515 0 obj << -/D [1258 0 R /XYZ 99.895 716.092 null] +/D [1512 0 R /XYZ 150.705 716.092 null] >> -% 1261 0 obj +% 1516 0 obj << -/D [1258 0 R /XYZ 99.895 513.636 null] +/D [1512 0 R /XYZ 150.705 663.469 null] >> -% 1257 0 obj +% 1511 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F60 1027 0 R /F59 812 0 R >> +/Font << /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1265 0 obj +% 1520 0 obj << /Type /Page -/Contents 1266 0 R -/Resources 1264 0 R +/Contents 1521 0 R +/Resources 1519 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1232 0 R -/Annots [ 1256 0 R 1263 0 R ] +/Parent 1510 0 R +/Annots [ 1517 0 R 1518 0 R ] >> -% 1269 0 obj +% 1517 0 obj << -/Producer (GPL Ghostscript 9.22) -/CreationDate (D:20180323100645Z00'00') -/ModDate (D:20180323100645Z00'00') +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 571.679 359.001 583.739] +/A << /S /GoTo /D (descdata) >> >> -% 1270 0 obj +% 1518 0 obj << -/Type /ExtGState -/OPM 1 +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 227.887 369.462 239.946] +/A << /S /GoTo /D (spdata) >> >> -% 1271 0 obj +% 1522 0 obj << -/BaseFont /XYUGDR+Times-Roman -/FontDescriptor 1273 0 R -/Type /Font -/FirstChar 48 -/LastChar 57 -/Widths [ 500 500 500 500 500 500 500 500 500 500] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/D [1520 0 R /XYZ 98.895 753.953 null] >> -% 1272 0 obj +% 336 0 obj << -/BaseFont /XISTAL+Times-Bold -/FontDescriptor 1274 0 R -/Type /Font -/FirstChar 48 -/LastChar 80 -/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/D [1520 0 R /XYZ 99.895 716.092 null] >> -% 1273 0 obj +% 1523 0 obj << -/Type /FontDescriptor -/FontName /XYUGDR+Times-Roman -/FontBBox [ 0 -14 476 688] -/Flags 65568 -/Ascent 688 -/CapHeight 688 -/Descent -14 -/ItalicAngle 0 -/StemV 71 -/MissingWidth 250 -/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) -/FontFile3 1275 0 R +/D [1520 0 R /XYZ 99.895 136.374 null] >> -% 1274 0 obj +% 1519 0 obj << -/Type /FontDescriptor -/FontName /XISTAL+Times-Bold -/FontBBox [ 0 -13 600 688] -/Flags 65568 -/Ascent 688 -/CapHeight 676 -/Descent -13 -/ItalicAngle 0 -/StemV 90 -/MissingWidth 250 -/CharSet (/P/one/zero) -/FontFile3 1276 0 R +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1256 0 obj +% 1525 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [231.023 625.272 242.978 634.682] -/A << /S /GoTo /D (table.17) >> +/Type /Page +/Contents 1526 0 R +/Resources 1524 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1510 0 R >> -% 1263 0 obj +% 1527 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [458.157 276.439 465.131 290.202] -/A << /S /GoTo /D (figure.3) >> +/D [1525 0 R /XYZ 149.705 753.953 null] >> -% 1267 0 obj +% 1528 0 obj << -/D [1265 0 R /XYZ 149.705 753.953 null] +/D [1525 0 R /XYZ 150.705 716.092 null] >> -% 1268 0 obj +% 1529 0 obj << -/D [1265 0 R /XYZ 150.705 326.444 null] +/D [1525 0 R /XYZ 150.705 699.334 null] >> -% 1264 0 obj +% 1530 0 obj +<< +/D [1525 0 R /XYZ 150.705 644.819 null] +>> +% 1524 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R >> -/XObject << /Im4 1262 0 R >> +/Font << /F62 667 0 R /F60 666 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1278 0 obj +% 1533 0 obj << /Type /Page -/Contents 1279 0 R -/Resources 1277 0 R +/Contents 1534 0 R +/Resources 1532 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R +/Parent 1510 0 R >> -% 1280 0 obj +% 1535 0 obj << -/D [1278 0 R /XYZ 98.895 753.953 null] +/D [1533 0 R /XYZ 98.895 753.953 null] >> -% 1277 0 obj +% 340 0 obj +<< +/D [1533 0 R /XYZ 99.895 716.092 null] +>> +% 1532 0 obj << -/Font << /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1286 0 obj +% 1539 0 obj << /Type /Page -/Contents 1287 0 R -/Resources 1285 0 R +/Contents 1540 0 R +/Resources 1538 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R -/Annots [ 1282 0 R 1283 0 R 1284 0 R ] +/Parent 1510 0 R +/Annots [ 1531 0 R 1536 0 R 1537 0 R ] >> -% 1282 0 obj +% 1531 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 345.485 495.412 357.545] -/A << /S /GoTo /D (vdata) >> +/Rect [310.942 654.503 378 666.562] +/A << /S /GoTo /D (descdata) >> >> -% 1283 0 obj +% 1536 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [377.462 333.53 389.417 345.59] -/A << /S /GoTo /D (table.18) >> +/Rect [342.753 480.963 420.271 493.022] +/A << /S /GoTo /D (spdata) >> >> -% 1284 0 obj +% 1537 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 266.146 409.811 278.205] +/Rect [310.942 411.699 378 423.758] /A << /S /GoTo /D (descdata) >> >> -% 1288 0 obj +% 1541 0 obj << -/D [1286 0 R /XYZ 149.705 753.953 null] +/D [1539 0 R /XYZ 149.705 753.953 null] >> -% 296 0 obj +% 1542 0 obj << -/D [1286 0 R /XYZ 150.705 716.092 null] +/D [1539 0 R /XYZ 150.705 306.27 null] >> -% 1289 0 obj +% 1543 0 obj << -/D [1286 0 R /XYZ 150.705 510.975 null] +/D [1539 0 R /XYZ 150.705 272.927 null] >> -% 1285 0 obj +% 1544 0 obj +<< +/D [1539 0 R /XYZ 150.705 236.878 null] +>> +% 1545 0 obj +<< +/D [1539 0 R /XYZ 150.705 167.614 null] +>> +% 1546 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F59 812 0 R /F85 814 0 R >> +/D [1539 0 R /XYZ 150.705 146.171 null] +>> +% 1538 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1295 0 obj +% 1548 0 obj << /Type /Page -/Contents 1296 0 R -/Resources 1294 0 R +/Contents 1549 0 R +/Resources 1547 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R -/Annots [ 1290 0 R 1292 0 R 1293 0 R ] +/Parent 1557 0 R >> -% 1290 0 obj +% 1550 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [202.52 554.876 214.475 566.936] -/A << /S /GoTo /D (table.18) >> +/D [1548 0 R /XYZ 98.895 753.953 null] >> -% 1292 0 obj +% 1551 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [407.408 325.46 414.381 339.127] -/A << /S /GoTo /D (figure.4) >> +/D [1548 0 R /XYZ 99.895 716.092 null] >> -% 1293 0 obj +% 1552 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [309.226 301.825 316.2 313.885] -/A << /S /GoTo /D (figure.3) >> +/D [1548 0 R /XYZ 99.895 651.514 null] >> -% 1297 0 obj +% 1553 0 obj << -/D [1295 0 R /XYZ 98.895 753.953 null] +/D [1548 0 R /XYZ 99.895 607.678 null] >> -% 1298 0 obj +% 1554 0 obj << -/D [1295 0 R /XYZ 99.895 464.818 null] +/D [1548 0 R /XYZ 99.895 575.798 null] >> -% 1299 0 obj +% 1555 0 obj << -/D [1295 0 R /XYZ 99.895 430.343 null] +/D [1548 0 R /XYZ 99.895 520.007 null] >> -% 1300 0 obj +% 1556 0 obj << -/D [1295 0 R /XYZ 99.895 386.508 null] +/D [1548 0 R /XYZ 99.895 476.171 null] >> -% 1294 0 obj +% 1547 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/Font << /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1303 0 obj +% 1562 0 obj << /Type /Page -/Contents 1304 0 R -/Resources 1302 0 R +/Contents 1563 0 R +/Resources 1561 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R +/Parent 1557 0 R +/Annots [ 1558 0 R 1559 0 R 1560 0 R ] >> -% 1305 0 obj +% 1558 0 obj << -/D [1303 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 572.168 409.811 584.228] +/A << /S /GoTo /D (descdata) >> >> -% 1302 0 obj +% 1559 0 obj << -/Font << /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 276.913 420.271 288.973] +/A << /S /GoTo /D (spdata) >> >> -% 1307 0 obj +% 1560 0 obj << -/Type /Page -/Contents 1308 0 R -/Resources 1306 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [345.92 208.672 412.978 220.731] +/A << /S /GoTo /D (descdata) >> >> -% 1310 0 obj +% 1564 0 obj << -/Producer (GPL Ghostscript 9.22) -/CreationDate (D:20180323100658Z00'00') -/ModDate (D:20180323100658Z00'00') +/D [1562 0 R /XYZ 149.705 753.953 null] >> -% 1311 0 obj +% 344 0 obj << -/Type /ExtGState -/OPM 1 +/D [1562 0 R /XYZ 150.705 716.092 null] >> -% 1312 0 obj +% 1561 0 obj << -/BaseFont /XYUGDR+Times-Roman -/FontDescriptor 1314 0 R -/Type /Font -/FirstChar 48 -/LastChar 57 -/Widths [ 500 500 500 500 500 500 500 500 500 500] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1313 0 obj +% 1566 0 obj << -/BaseFont /XISTAL+Times-Bold -/FontDescriptor 1315 0 R -/Type /Font -/FirstChar 48 -/LastChar 80 -/Widths [ 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 611] -/Encoding /WinAnsiEncoding -/Subtype /Type1 +/Type /Page +/Contents 1567 0 R +/Resources 1565 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1557 0 R >> -% 1314 0 obj +% 1568 0 obj << -/Type /FontDescriptor -/FontName /XYUGDR+Times-Roman -/FontBBox [ 0 -14 476 688] -/Flags 65568 -/Ascent 688 -/CapHeight 688 -/Descent -14 -/ItalicAngle 0 -/StemV 71 -/MissingWidth 250 -/CharSet (/eight/five/four/nine/one/seven/six/three/two/zero) -/FontFile3 1316 0 R +/D [1566 0 R /XYZ 98.895 753.953 null] >> -% 1315 0 obj +% 1569 0 obj << -/Type /FontDescriptor -/FontName /XISTAL+Times-Bold -/FontBBox [ 0 -13 600 688] -/Flags 65568 -/Ascent 688 -/CapHeight 676 -/Descent -13 -/ItalicAngle 0 -/StemV 90 -/MissingWidth 250 -/CharSet (/P/one/zero) -/FontFile3 1317 0 R +/D [1566 0 R /XYZ 99.895 701.929 null] +>> +% 1570 0 obj +<< +/D [1566 0 R /XYZ 99.895 667.454 null] +>> +% 1571 0 obj +<< +/D [1566 0 R /XYZ 99.895 647.529 null] +>> +% 1572 0 obj +<< +/D [1566 0 R /XYZ 99.895 603.693 null] >> -% 1309 0 obj +% 1573 0 obj << -/D [1307 0 R /XYZ 98.895 753.953 null] +/D [1566 0 R /XYZ 99.895 547.902 null] >> -% 1301 0 obj +% 1574 0 obj << -/D [1307 0 R /XYZ 99.895 282.918 null] +/D [1566 0 R /XYZ 99.895 527.977 null] >> -% 1306 0 obj +% 1575 0 obj << -/Font << /F54 586 0 R >> -/XObject << /Im5 1291 0 R >> +/D [1566 0 R /XYZ 99.895 496.097 null] +>> +% 1565 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1322 0 obj +% 1579 0 obj << /Type /Page -/Contents 1323 0 R -/Resources 1321 0 R +/Contents 1580 0 R +/Resources 1578 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1281 0 R -/Annots [ 1318 0 R 1319 0 R 1320 0 R ] ->> -% 1318 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [419.358 298.8 495.412 310.86] -/A << /S /GoTo /D (vdata) >> +/Parent 1557 0 R +/Annots [ 1576 0 R 1577 0 R ] >> -% 1319 0 obj +% 1576 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [255.331 289.495 267.287 298.905] -/A << /S /GoTo /D (table.19) >> +/Rect [342.753 573.77 420.271 585.83] +/A << /S /GoTo /D (spdata) >> >> -% 1320 0 obj +% 1577 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 218.115 409.811 230.175] +/Rect [342.753 506.024 409.811 518.084] /A << /S /GoTo /D (descdata) >> >> -% 1324 0 obj -<< -/D [1322 0 R /XYZ 149.705 753.953 null] ->> -% 300 0 obj +% 1581 0 obj << -/D [1322 0 R /XYZ 150.705 716.092 null] +/D [1579 0 R /XYZ 149.705 753.953 null] >> -% 1325 0 obj +% 348 0 obj << -/D [1322 0 R /XYZ 150.705 460.417 null] +/D [1579 0 R /XYZ 150.705 716.092 null] >> -% 1321 0 obj +% 1578 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1327 0 obj +% 1585 0 obj << /Type /Page -/Contents 1328 0 R -/Resources 1326 0 R +/Contents 1586 0 R +/Resources 1584 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R +/Parent 1557 0 R +/Annots [ 1582 0 R 1583 0 R ] >> -% 1329 0 obj -<< -/D [1327 0 R /XYZ 98.895 753.953 null] ->> -% 1326 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1334 0 obj +% 1582 0 obj << -/Type /Page -/Contents 1335 0 R -/Resources 1333 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1331 0 R ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 559.823 369.462 571.882] +/A << /S /GoTo /D (spdata) >> >> -% 1331 0 obj +% 1583 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 211.064 409.811 223.124] +/Rect [291.943 492.077 359.001 504.136] /A << /S /GoTo /D (descdata) >> >> -% 1336 0 obj +% 1587 0 obj << -/D [1334 0 R /XYZ 149.705 753.953 null] +/D [1585 0 R /XYZ 98.895 753.953 null] >> -% 304 0 obj +% 352 0 obj << -/D [1334 0 R /XYZ 150.705 716.092 null] +/D [1585 0 R /XYZ 99.895 716.092 null] >> -% 1337 0 obj +% 1588 0 obj << -/D [1334 0 R /XYZ 150.705 449.977 null] +/D [1585 0 R /XYZ 99.895 312.355 null] >> -% 1333 0 obj +% 1584 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1341 0 obj +% 1592 0 obj << /Type /Page -/Contents 1342 0 R -/Resources 1340 0 R +/Contents 1593 0 R +/Resources 1591 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1332 0 R 1338 0 R 1339 0 R ] +/Parent 1557 0 R +/Annots [ 1589 0 R 1590 0 R ] >> -% 1332 0 obj +% 1589 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [287.51 642.547 363.564 654.607] -/A << /S /GoTo /D (vdata) >> +/Rect [310.942 577.893 378 589.953] +/A << /S /GoTo /D (descdata) >> >> -% 1338 0 obj +% 1590 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [123.806 542.921 199.86 554.981] +/Rect [261.249 128.475 337.303 140.535] /A << /S /GoTo /D (vdata) >> >> -% 1339 0 obj +% 1594 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [421.516 542.921 433.471 554.981] -/A << /S /GoTo /D (table.20) >> +/D [1592 0 R /XYZ 149.705 753.953 null] >> -% 1343 0 obj +% 356 0 obj << -/D [1341 0 R /XYZ 98.895 753.953 null] +/D [1592 0 R /XYZ 150.705 716.092 null] >> -% 1340 0 obj +% 1591 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1345 0 obj +% 1596 0 obj << /Type /Page -/Contents 1346 0 R -/Resources 1344 0 R +/Contents 1597 0 R +/Resources 1595 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R +/Parent 1600 0 R +>> +% 1598 0 obj +<< +/D [1596 0 R /XYZ 98.895 753.953 null] +>> +% 1599 0 obj +<< +/D [1596 0 R /XYZ 99.895 632.19 null] +>> +% 1595 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] >> endstream endobj -1352 0 obj +1605 0 obj << -/Length 6337 +/Length 6336 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.13)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(routine)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geins\050m,)-525(irw,)-525(val,)-525(x,)-525(desc_a,)-525(info)-525([,local]\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -20.57 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.386 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.386 Td [(m)]TJ +0 g 0 G +/F62 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows)-250(in)]TJ/F60 9.9626 Tf 86.569 0 Td [(v)-40(a)-25(l)]TJ/F62 9.9626 Tf 15.736 0 Td [(to)-250(be)-250(inserted.)]TJ -91.237 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.386 Td [(irw)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Indices)-381(of)-382(the)-382(r)18(ows)-381(to)-382(be)-381(inserted.)-705(Speci\002cally)111(,)-414(r)18(ow)]TJ/F60 9.9626 Tf 239.84 0 Td [(i)]TJ/F62 9.9626 Tf 6.765 0 Td [(of)]TJ/F60 9.9626 Tf 12.683 0 Td [(v)-40(a)-25(l)]TJ/F62 9.9626 Tf 17.046 0 Td [(will)-381(be)-382(in-)]TJ -271.901 -11.955 Td [(serted)-344(into)-344(the)-344(local)-344(r)18(ow)-344(corr)18(esponding)-344(to)-344(the)-344(global)-344(r)18(ow)-344(index)]TJ/F60 9.9626 Tf 290.218 0 Td [(i)-22(r)-35(w)]TJ/F93 10.3811 Tf 14.654 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 4.149 0 Td [(.)]TJ -316.314 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.386 Td [(val)]TJ +0 g 0 G +/F62 9.9626 Tf 18.819 0 Td [(the)-250(dense)-250(submatrix)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)-310(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.386 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 414.446 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 414.247 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 360.068 366.626 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 366.426 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 366.626 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 387.893 366.426 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -19.386 Td [(local)]TJ +0 g 0 G +/F62 9.9626 Tf 26.56 0 Td [(Whether)-240(the)-240(entries)-241(in)-240(the)-240(index)-240(vector)]TJ/F67 9.9626 Tf 173.162 0 Td [(irw)]TJ/F62 9.9626 Tf 15.692 0 Td [(,)-242(ar)18(e)-240(alr)18(eady)-241(i)1(n)-241(local)-240(number)18(-)]TJ -190.508 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F67 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F62 9.9626 Tf 36.613 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -224.197 -20.57 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.387 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(output)-250(dense)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(rank)-190(one)-190(or)-190(two)-190(array)-190(or)-190(an)-190(object)-190(of)-190(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 242.569 0 Td [(psb)]TJ +ET +q +1 0 0 1 434.498 211.642 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 437.636 211.443 Td [(T)]TJ +ET +q +1 0 0 1 443.494 211.642 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 446.633 211.443 Td [(vect)]TJ +ET +q +1 0 0 1 468.182 211.642 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 471.32 211.443 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(,)]TJ -316.63 -11.955 Td [(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -31.342 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -29.888 Td [(92)]TJ +0 g 0 G +ET + +endstream +endobj +1610 0 obj +<< +/Length 539 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(Dense)-250(vectors/matrices)-250(do)-250(not)-250(have)-250(an)-250(associated)-250(state;)]TJ +0 g 0 G + 0 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(Duplicate)-326(entries)-326(ar)18(e)-325(either)-326(overwritten)-326(or)-326(added,)-345(ther)18(e)-325(is)-326(no)-326(pr)18(ovision)]TJ 12.453 -11.955 Td [(for)-250(raising)-250(an)-250(err)18(or)-250(condition.)]TJ +0 g 0 G + 141.968 -563.885 Td [(93)]TJ +0 g 0 G +ET + +endstream +endobj +1619 0 obj +<< +/Length 6120 >> stream 0 g 0 G 0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.14)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geasb\050x,)-525(desc_a,)-525(info,)-525(mold\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 625.596 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 625.397 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 136.328 0 Td [(psb)]TJ +ET +q +1 0 0 1 328.257 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 331.395 577.576 Td [(desc)]TJ +ET +q +1 0 0 1 352.944 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 356.083 577.576 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -226.299 -31.88 Td [(mold)]TJ 0 g 0 G +/F62 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.876 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F67 9.9626 Tf 199.087 0 Td [(psb)]TJ +ET +q +1 0 0 1 391.016 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 394.155 497.875 Td [(T)]TJ +ET +q +1 0 0 1 400.012 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 403.151 497.875 Td [(base)]TJ +ET +q +1 0 0 1 424.7 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(nl)]TJ -0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(Data)-305(allocation:)-421(in)-305(a)-305(generalized)-305(block-r)18(ow)-305(distribution)-306(the)-305(number)-305(of)-305(in-)]TJ 10.521 -11.955 Td [(dices)-250(belonging)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(May)-250(be)-250(speci\002ed)-250(together)-250(with)]TJ/F59 9.9626 Tf 272.943 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -308.311 -20.135 Td [(repl)]TJ -0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(Data)-288(allocation:)-385(build)-288(a)-288(r)18(eplicated)-287(index)-288(space)-288(\050i.e.)-423(all)-288(pr)18(ocesses)-287(own)-288(all)]TJ 1.664 -11.955 Td [(indices\051.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(the)-250(logical)-250(value)]TJ/F59 9.9626 Tf 132.133 0 Td [(.true.)]TJ -0 g 0 G -/F51 9.9626 Tf -157.04 -20.135 Td [(globalcheck)]TJ -0 g 0 G -/F54 9.9626 Tf 59.766 0 Td [(Data)-250(allocation:)-310(do)-250(global)-250(checks)-250(on)-250(the)-250(local)-250(index)-250(lists)]TJ/F59 9.9626 Tf 247.788 0 Td [(vl)]TJ/F54 9.9626 Tf -282.647 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value,)-250(default:)]TJ/F59 9.9626 Tf 162.678 0 Td [(.false.)]TJ +/F67 9.9626 Tf 427.838 497.875 Td [(vect)]TJ +ET +q +1 0 0 1 449.387 498.074 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 452.525 497.875 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)]TJ/F60 9.9626 Tf 97.12 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 42.899 0 Td [(psb)]TJ +ET +q +1 0 0 1 339.644 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 342.783 485.92 Td [(T)]TJ +ET +q +1 0 0 1 348.641 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 351.779 485.92 Td [(vect)]TJ +ET +q +1 0 0 1 373.328 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 376.466 485.92 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -187.585 -20.135 Td [(lidx)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(Data)-308(allocation:)-425(the)-307(set)-308(of)-307(local)-308(indices)]TJ/F52 9.9626 Tf 175.731 0 Td [(l)-48(i)-32(d)-42(x)]TJ/F85 10.3811 Tf 17.065 0 Td [(\050)]TJ/F54 9.9626 Tf 4.15 0 Td [(1)-397(:)]TJ/F52 9.9626 Tf 15.505 0 Td [(n)-25(l)]TJ/F85 10.3811 Tf 9.105 0 Td [(\051)]TJ/F54 9.9626 Tf 7.214 0 Td [(to)-308(be)-307(assigned)-308(to)-307(the)]TJ -226.548 -11.955 Td [(global)-250(indices)]TJ/F52 9.9626 Tf 63.476 0 Td [(v)-25(l)]TJ/F54 9.9626 Tf 8.423 0 Td [(.)]TJ -71.899 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ +/F59 9.9626 Tf -246.682 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.127 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -19.925 Td [(x)]TJ 0 g 0 G - 0 -20.135 Td [(desc)]TJ +/F62 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-259(rank)-258(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 50.55 0 Td [(psb)]TJ ET q -1 0 0 1 120.408 376.512 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 242.48 384.5 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 123.397 376.313 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 245.618 384.301 Td [(T)]TJ ET q -1 0 0 1 309.258 328.692 cm +1 0 0 1 251.476 384.5 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 328.492 Td [(desc)]TJ +/F67 9.9626 Tf 254.614 384.301 Td [(vect)]TJ ET q -1 0 0 1 333.945 328.692 cm +1 0 0 1 276.163 384.5 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 328.492 Td [(type)]TJ +/F67 9.9626 Tf 279.301 384.301 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.135 Td [(info)]TJ +/F59 9.9626 Tf -149.518 -31.881 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -22.128 Td [(Notes)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -20.082 Td [(1.)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(One)-309(of)-310(the)-309(optional)-310(ar)18(guments)]TJ/F59 9.9626 Tf 152.661 0 Td [(parts)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)]TJ/F59 9.9626 Tf 5.723 0 Td [(vg)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf 5.723 0 Td [(vl)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf 5.723 0 Td [(nl)]TJ/F54 9.9626 Tf 13.544 0 Td [(or)]TJ/F59 9.9626 Tf 12.458 0 Td [(repl)]TJ/F54 9.9626 Tf 24.005 0 Td [(must)-310(be)-309(speci-)]TJ -254.456 -11.956 Td [(\002ed,)-250(ther)18(eby)-250(choosing)-250(the)-250(initialization)-250(strategy)-250(as)-250(follows:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.135 Td [(parts)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(In)-242(this)-242(case)-242(we)-243(have)-242(a)-242(subr)18(outine)-242(specifying)-242(the)-242(mapping)-242(between)]TJ -5.748 -11.955 Td [(global)-312(indices)-312(and)-311(pr)18(ocess/local)-312(index)-312(pairs.)-496(If)-311(this)-312(optional)-312(ar)18(gu-)]TJ 0 -11.955 Td [(ment)-230(is)-230(speci\002ed,)-234(then)-230(it)-230(is)-230(mandatory)-230(to)-230(specify)-230(the)-230(ar)18(gument)]TJ/F59 9.9626 Tf 274.929 0 Td [(mg)]TJ/F54 9.9626 Tf 12.752 0 Td [(as)]TJ -287.681 -11.955 Td [(well.)-310(The)-250(subr)18(outine)-250(must)-250(conform)-250(to)-250(the)-250(following)-250(interface:)]TJ + [-500(On)-227(entry)-226(to)-227(this)-226(r)18(outine)-227(th)1(e)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F67 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F62 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ 0 g 0 G + -62.017 -19.925 Td [(2.)]TJ 0 g 0 G -/F59 9.9626 Tf 10.46 -18.09 Td [(interface)]TJ 15.691 -11.955 Td [(subroutine)-525(psb_parts\050glob_index,mg,np,pv,nv\051)]TJ + [-500(If)-431(the)]TJ/F67 9.9626 Tf 41.544 0 Td [(bldmode=psb_matbld_remote_)]TJ/F62 9.9626 Tf 140.287 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-273(the)-272(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.955 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-250(element)-250(applications,)-250(with)]TJ/F67 9.9626 Tf 144.277 0 Td [(dupl=psb_dupl_add_)]TJ/F62 9.9626 Tf 94.146 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 93.899 -29.888 Td [(71)]TJ + -96.455 -104.573 Td [(94)]TJ 0 g 0 G ET endstream endobj -1358 0 obj +1627 0 obj << -/Length 9985 +/Length 3224 >> stream 0 g 0 G 0 g 0 G BT -/F59 9.9626 Tf 234.142 706.129 Td [(integer,)-525(intent)-525(\050in\051)-1050(::)-525(glob_index,np,mg)]TJ 0 -11.955 Td [(integer,)-525(intent)-525(\050out\051)-525(::)-525(nv,)-525(pv\050*\051)]TJ -10.461 -11.955 Td [(end)-525(subroutine)-525(psb_parts)]TJ -15.691 -11.956 Td [(end)-525(interface)]TJ/F54 9.9626 Tf -10.461 -17.586 Td [(The)-250(input)-250(ar)18(guments)-250(ar)18(e:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -15.594 Td [(glob)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.15)-1000(psb)]TJ ET q -1 0 0 1 218.61 637.283 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 221.599 637.083 Td [(index)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(The)-250(global)-250(index)-250(to)-250(be)-250(mapped;)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(gefree)-250(\227)-250(Frees)-250(a)-250(dense)-250(matrix)]TJ 0 g 0 G -/F51 9.9626 Tf -54.506 -13.774 Td [(np)]TJ 0 g 0 G -/F54 9.9626 Tf 17.156 0 Td [(The)-250(number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(mapping;)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gefree\050x,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -17.156 -13.774 Td [(mg)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 19.377 0 Td [(The)-250(total)-250(number)-250(of)-250(global)-250(r)18(ows)-250(in)-250(the)-250(mapping;)]TJ -19.377 -15.594 Td [(The)-250(output)-250(ar)18(guments)-250(ar)18(e:)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -15.594 Td [(nv)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 16.608 0 Td [(The)-250(number)-250(of)-250(entries)-250(in)]TJ/F59 9.9626 Tf 111.052 0 Td [(pv)]TJ/F54 9.9626 Tf 10.461 0 Td [(;)]TJ 0 g 0 G -/F51 9.9626 Tf -138.121 -13.774 Td [(pv)]TJ -0 g 0 G -/F54 9.9626 Tf 16.608 0 Td [(A)-393(vector)-394(containing)-393(the)-394(indices)-393(of)-394(the)-394(pr)18(ocesses)-393(to)-394(which)-393(the)]TJ 2.022 -11.955 Td [(global)-357(index)-357(should)-356(be)-357(assigend;)-410(each)-357(entry)-357(must)-357(satisfy)-357(0)]TJ/F83 10.3811 Tf 270.063 0 Td [(\024)]TJ/F52 9.9626 Tf -269.39 -11.956 Td [(p)-25(v)]TJ/F85 10.3811 Tf 10.461 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F61 10.3811 Tf 8.665 0 Td [(<)]TJ/F52 9.9626 Tf 12.71 0 Td [(n)-80(p)]TJ/F54 9.9626 Tf 11.442 0 Td [(;)-382(if)]TJ/F52 9.9626 Tf 16.006 0 Td [(n)-25(v)]TJ/F61 10.3811 Tf 15.409 0 Td [(>)]TJ/F54 9.9626 Tf 12.586 0 Td [(1)-338(we)-338(have)-338(an)-338(index)-338(assigned)-338(to)-338(multiple)]TJ -95.244 -11.955 Td [(pr)18(ocesses,)-250(i.e.)-310(we)-250(have)-250(an)-250(overlap)-250(among)-250(the)-250(subdomains.)]TJ -0 g 0 G -/F51 9.9626 Tf -40.548 -15.594 Td [(vg)]TJ -0 g 0 G -/F54 9.9626 Tf 16.06 0 Td [(In)-330(this)-330(case)-330(the)-330(association)-330(between)-331(an)-330(index)-330(and)-330(a)-330(pr)18(ocess)-330(is)-330(spec-)]TJ 5.858 -11.955 Td [(i\002ed)-371(via)-372(an)-371(integer)-372(vector)]TJ/F59 9.9626 Tf 120.986 0 Td [(vg\0501:mg\051)]TJ/F54 9.9626 Tf 41.842 0 Td [(;)-432(each)-372(index)]TJ/F52 9.9626 Tf 58.923 0 Td [(i)]TJ/F83 10.3811 Tf 8.096 0 Td [(2)-506(f)]TJ/F54 9.9626 Tf 17.485 0 Td [(1)-179(.)-192(.)-192(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F83 10.3811 Tf 13.449 0 Td [(g)]TJ/F54 9.9626 Tf 9.016 0 Td [(is)]TJ -289.764 -11.955 Td [(assigned)-381(to)-381(pr)18(ocess)]TJ/F52 9.9626 Tf 91.547 0 Td [(v)-47(g)]TJ/F85 10.3811 Tf 10.68 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.089 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)-703(The)-380(vector)]TJ/F59 9.9626 Tf 61.203 0 Td [(vg)]TJ/F54 9.9626 Tf 14.255 0 Td [(must)-381(be)-381(identical)-381(on)-380(all)]TJ -189.127 -11.955 Td [(calling)-354(pr)18(ocesses;)-406(its)-355(entri)1(es)-355(may)-354(have)-354(the)-354(ranges)]TJ/F85 10.3811 Tf 226.209 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.888 0 Td [(\000)]TJ/F54 9.9626 Tf 10.515 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 7.678 0 Td [(or)]TJ/F85 10.3811 Tf -287.387 -11.955 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(n)-80(p)]TJ/F85 10.3811 Tf 11.566 0 Td [(\051)]TJ/F54 9.9626 Tf 6.984 0 Td [(accor)18(ding)-284(to)-285(the)-284(value)-285(of)]TJ/F59 9.9626 Tf 113.068 0 Td [(flag)]TJ/F54 9.9626 Tf 20.922 0 Td [(.)-413(The)-285(size)]TJ/F52 9.9626 Tf 45.955 0 Td [(m)-47(g)]TJ/F54 9.9626 Tf 16.159 0 Td [(may)-285(be)-284(spec-)]TJ -238.895 -11.955 Td [(i\002ed)-349(via)-349(the)-349(optional)-349(ar)18(gument)]TJ/F59 9.9626 Tf 144.092 0 Td [(mg)]TJ/F54 9.9626 Tf 10.46 0 Td [(;)-399(the)-349(default)-349(is)-349(to)-349(use)-349(the)-349(entir)18(e)]TJ -154.552 -11.956 Td [(vector)]TJ/F59 9.9626 Tf 29.937 0 Td [(vg)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)-250(thus)-250(having)]TJ/F59 9.9626 Tf 59.885 0 Td [(mg=size\050vg\051)]TJ/F54 9.9626 Tf 57.534 0 Td [(.)]TJ + 0 -19.925 Td [(x)]TJ 0 g 0 G -/F51 9.9626 Tf -179.735 -15.593 Td [(vl)]TJ +/F62 9.9626 Tf 9.963 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-258(rank)-259(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 50.55 0 Td [(psb)]TJ +ET +q +1 0 0 1 191.67 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 194.809 565.621 Td [(T)]TJ +ET +q +1 0 0 1 200.666 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 203.805 565.621 Td [(vect)]TJ +ET +q +1 0 0 1 225.354 565.82 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 228.492 565.621 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(In)-383(this)-382(case)-383(we)-383(ar)18(e)-382(specifying)-383(the)-383(list)-382(of)-383(indices)]TJ/F59 9.9626 Tf 220.787 0 Td [(vl\0501:nl\051)]TJ/F54 9.9626 Tf 45.655 0 Td [(assigned)]TJ -258.362 -11.955 Td [(to)-401(the)-400(curr)18(ent)-401(pr)18(ocess;)-476(thus,)-438(the)-400(global)-401(pr)18(oblem)-400(size)]TJ/F52 9.9626 Tf 243.82 0 Td [(m)-47(g)]TJ/F54 9.9626 Tf 17.316 0 Td [(is)-401(given)]TJ -261.136 -11.956 Td [(by)-435(the)-435(range)-435(of)-435(the)-435(aggr)18(egate)-435(of)-435(the)-435(individual)-435(vectors)]TJ/F59 9.9626 Tf 259.368 0 Td [(vl)]TJ/F54 9.9626 Tf 14.794 0 Td [(spec-)]TJ -274.162 -11.955 Td [(i\002ed)-429(in)-429(the)-429(calling)-428(pr)18(ocesses.)-847(The)-429(size)-429(may)-429(be)-428(speci\002ed)-429(via)-429(the)]TJ 0 -11.955 Td [(optional)-438(ar)18(gument)]TJ/F59 9.9626 Tf 88.319 0 Td [(nl)]TJ/F54 9.9626 Tf 10.46 0 Td [(;)-532(the)-438(default)-438(is)-438(to)-438(use)-438(the)-438(entir)18(e)-438(vector)]TJ/F59 9.9626 Tf 185.156 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)]TJ -294.396 -11.955 Td [(thus)-364(having)]TJ/F59 9.9626 Tf 57.178 0 Td [(nl=size\050vl\051)]TJ/F54 9.9626 Tf 57.534 0 Td [(.)-652(If)]TJ/F59 9.9626 Tf 19.294 0 Td [(globalcheck=.true.)]TJ/F54 9.9626 Tf 97.774 0 Td [(the)-364(subr)18(outine)]TJ -231.78 -11.955 Td [(will)-403(check)-403(how)-404(many)-403(times)-403(each)-403(entry)-403(in)-404(the)-403(global)-403(index)-403(space)]TJ/F85 10.3811 Tf 0.125 -11.955 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.967 0 Td [(m)-47(g)]TJ/F85 10.3811 Tf 13.449 0 Td [(\051)]TJ/F54 9.9626 Tf 6.245 0 Td [(is)-210(speci\002ed)-211(in)-210(the)-210(input)-210(lists)]TJ/F59 9.9626 Tf 122.836 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)-218(thus)-211(allowin)1(g)-211(for)-210(the)-210(pr)18(es-)]TJ -177.232 -11.956 Td [(ence)-302(of)-302(overlap)-302(in)-302(the)-302(input,)-315(and)-302(checki)1(ng)-302(for)-302(\223orphan\224)-302(indices.)-466(If)]TJ/F59 9.9626 Tf 0 -11.955 Td [(globalcheck=.false.)]TJ/F54 9.9626 Tf 99.377 0 Td [(,)-437(the)-400(subr)18(outine)-400(will)-400(not)-400(check)-400(for)-400(overlap,)]TJ -99.377 -11.955 Td [(and)-255(may)-255(be)-255(signi\002cantly)-255(faster)74(,)-257(but)-255(the)-255(user)-255(is)-255(implicitly)-255(guarantee-)]TJ 0 -11.955 Td [(ing)-250(that)-250(ther)18(e)-250(ar)18(e)-250(neither)-250(orphan)-250(nor)-250(overlap)-250(indices.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -21.918 -15.594 Td [(lidx)]TJ +/F59 9.9626 Tf -149.518 -31.881 Td [(desc)]TJ +ET +q +1 0 0 1 120.408 533.94 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 123.397 533.74 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(The)-377(optional)-376(ar)18(gument)]TJ/F59 9.9626 Tf 107.528 0 Td [(lidx)]TJ/F54 9.9626 Tf 24.674 0 Td [(is)-377(available)-376(for)-377(those)-377(cases)-377(in)-376(which)]TJ -132.969 -11.955 Td [(the)-361(user)-361(has)-361(alr)18(eady)-361(established)-361(a)-361(global-to-local)-361(mapping;)-416(if)-361(it)-361(is)]TJ 0 -11.955 Td [(speci\002ed,)-253(each)-253(index)-253(in)]TJ/F59 9.9626 Tf 105.128 0 Td [(vl\050i\051)]TJ/F54 9.9626 Tf 28.669 0 Td [(will)-253(be)-252(mapped)-253(to)-253(the)-252(corr)18(esponding)]TJ -133.797 -11.955 Td [(local)-317(index)]TJ/F59 9.9626 Tf 51.649 0 Td [(lidx\050i\051)]TJ/F54 9.9626 Tf 36.612 0 Td [(.)-510(When)-317(specifying)-316(the)-317(ar)18(gument)]TJ/F59 9.9626 Tf 148.638 0 Td [(lidx)]TJ/F54 9.9626 Tf 24.076 0 Td [(the)-317(user)]TJ -260.975 -11.956 Td [(would)-329(also)-330(likely)-329(employ)]TJ/F59 9.9626 Tf 117.394 0 Td [(lidx)]TJ/F54 9.9626 Tf 24.203 0 Td [(in)-329(calls)-330(to)]TJ/F59 9.9626 Tf 46.656 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf 50.355 0 Td [(and)]TJ/F59 9.9626 Tf 20.148 0 Td [(local)]TJ/F54 9.9626 Tf 29.433 0 Td [(in)]TJ -288.189 -11.955 Td [(calls)-250(to)]TJ/F59 9.9626 Tf 33.095 0 Td [(psb_spins)]TJ/F54 9.9626 Tf 49.564 0 Td [(and)]TJ/F59 9.9626 Tf 19.358 0 Td [(psb_geins)]TJ/F54 9.9626 Tf 47.073 0 Td [(;)-250(see)-250(also)-250(sec.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG - [-250(2.3.1)]TJ -0 g 0 G - [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -171.008 -15.593 Td [(nl)]TJ +/F67 9.9626 Tf 136.327 0 Td [(psb)]TJ +ET +q +1 0 0 1 277.448 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 280.586 485.92 Td [(desc)]TJ +ET +q +1 0 0 1 302.135 486.119 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 305.273 485.92 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(If)-411(this)-411(ar)18(gument)-411(is)-411(speci\002ed)-411(alone)-411(\050i.e.)-793(without)]TJ/F59 9.9626 Tf 223.432 0 Td [(vl)]TJ/F54 9.9626 Tf 10.461 0 Td [(\051)-411(the)-411(r)18(esult)-411(is)-411(a)]TJ -226.361 -11.956 Td [(generalized)-280(r)18(ow-block)-280(distribution)-280(in)-280(which)-281(each)-280(pr)18(ocess)]TJ/F52 9.9626 Tf 257.148 0 Td [(I)]TJ/F54 9.9626 Tf 6.642 0 Td [(gets)-280(as-)]TJ -263.79 -11.955 Td [(signed)-250(a)-250(consecutive)-250(chunk)-250(of)]TJ/F52 9.9626 Tf 135.186 0 Td [(N)]TJ/F52 7.5716 Tf 7.851 -1.808 Td [(I)]TJ/F85 10.3811 Tf 6.316 1.808 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-25(l)]TJ/F54 9.9626 Tf 11.472 0 Td [(global)-250(indices.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -193.829 -15.593 Td [(repl)]TJ +/F59 9.9626 Tf -226.3 -33.873 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(This)-418(ar)18(guments)-417(speci\002es)-418(to)-417(r)18(eplicate)-418(all)-417(indices)-418(on)-418(all)-417(pr)18(ocesses.)]TJ -1.325 -11.956 Td [(This)-366(is)-367(a)-366(special)-366(purpose)-366(data)-367(allocation)-366(that)-366(is)-366(useful)-367(in)-366(the)-366(con-)]TJ 0 -11.955 Td [(str)8(uction)-250(of)-250(some)-250(multilevel)-250(pr)18(econditioners.)]TJ 0 g 0 G - -34.371 -19.579 Td [(2.)]TJ + 0 -19.925 Td [(info)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 154.421 -29.888 Td [(72)]TJ + 141.968 -293.863 Td [(95)]TJ 0 g 0 G ET endstream endobj -1364 0 obj +1631 0 obj << -/Length 2555 +/Length 3218 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F54 9.9626 Tf 112.349 706.129 Td [(3.)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.16)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ 0 g 0 G - [-500(Calling)-260(the)-260(r)18(outine)-260(with)]TJ/F59 9.9626 Tf 120.886 0 Td [(vg)]TJ/F54 9.9626 Tf 13.052 0 Td [(or)]TJ/F59 9.9626 Tf 11.965 0 Td [(parts)]TJ/F54 9.9626 Tf 28.742 0 Td [(implies)-260(that)-260(every)-260(pr)18(ocess)-260(will)-260(scan)]TJ -162.192 -11.955 Td [(the)-250(entir)18(e)-250(index)-250(space)-250(to)-250(\002gur)18(e)-250(out)-250(the)-250(local)-250(indices.)]TJ 0 g 0 G - -12.453 -19.926 Td [(4.)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gelp\050trans,)-525(iperm,)-525(x,)-525(info\051)]TJ 0 g 0 G - [-500(Overlapped)-250(indices)-250(ar)18(e)-250(possible)-250(with)-250(both)]TJ/F59 9.9626 Tf 201.093 0 Td [(parts)]TJ/F54 9.9626 Tf 28.643 0 Td [(and)]TJ/F59 9.9626 Tf 19.357 0 Td [(vl)]TJ/F54 9.9626 Tf 12.951 0 Td [(invocations.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - -262.044 -19.925 Td [(5.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F59 9.9626 Tf 175.161 0 Td [(vl)]TJ/F54 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F59 9.9626 Tf 86.235 0 Td [(globalcheck=.true.)]TJ/F54 9.9626 Tf 94.146 0 Td [(,)]TJ -355.443 -11.955 Td [(it)-280(will)-281(perform)-280(a)-280(scan)-281(of)-280(the)-281(index)-280(space)-280(to)-281(sear)18(ch)-280(for)-280(overlap)-281(or)-280(orphan)]TJ 0 -11.955 Td [(indices.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - -12.453 -19.925 Td [(6.)]TJ 0 g 0 G - [-500(When)-190(the)-190(subr)18(outine)-190(is)-190(invoked)-190(with)]TJ/F59 9.9626 Tf 175.161 0 Td [(vl)]TJ/F54 9.9626 Tf 12.354 0 Td [(in)-190(conjunction)-190(with)]TJ/F59 9.9626 Tf 86.235 0 Td [(globalcheck=.false.)]TJ/F54 9.9626 Tf 99.377 0 Td [(,)]TJ -360.674 -11.956 Td [(no)-338(index)-337(space)-338(scan)-337(will)-338(take)-337(place.)-573(Thus)-337(it)-338(is)-338(the)-337(r)18(esponsibility)-338(of)-337(the)]TJ 0 -11.955 Td [(user)-328(to)-328(make)-328(sur)18(e)-328(that)-328(the)-328(indices)-328(speci\002ed)-328(in)]TJ/F59 9.9626 Tf 209.973 0 Td [(vl)]TJ/F54 9.9626 Tf 13.729 0 Td [(have)-328(neither)-328(orphans)]TJ -223.702 -11.955 Td [(nor)-250(overlaps;)-250(if)-250(this)-250(assumption)-250(fails,)-250(r)18(esults)-250(will)-250(be)-250(unpr)18(edictable.)]TJ + 0 -19.925 Td [(trans)]TJ 0 g 0 G - -12.453 -19.925 Td [(7.)]TJ +/F62 9.9626 Tf 27.666 0 Td [(A)-250(character)-250(that)-250(speci\002es)-250(whether)-250(to)-250(permute)]TJ/F60 9.9626 Tf 203.748 0 Td [(A)]TJ/F62 9.9626 Tf 9.808 0 Td [(or)]TJ/F60 9.9626 Tf 12.488 0 Td [(A)]TJ/F60 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F62 9.9626 Tf 5.401 -3.616 Td [(.)]TJ -241.716 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(single)-250(character)-250(with)-250(value)-250('N')-250(for)]TJ/F60 9.9626 Tf 218.195 0 Td [(A)]TJ/F62 9.9626 Tf 9.808 0 Td [(or)-250('T')-250(for)]TJ/F60 9.9626 Tf 41.807 0 Td [(A)]TJ/F60 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F62 9.9626 Tf 5.401 -3.616 Td [(.)]TJ 0 g 0 G - [-500(Orphan)-417(and)-416(overlap)-417(indices)-416(ar)18(e)-417(impossible)-416(by)-417(constr)8(uction)-417(when)-416(the)]TJ 12.453 -11.955 Td [(subr)18(outine)-250(is)-250(invoked)-250(with)]TJ/F59 9.9626 Tf 121.164 0 Td [(nl)]TJ/F54 9.9626 Tf 12.951 0 Td [(\050alone\051,)-250(or)]TJ/F59 9.9626 Tf 47.372 0 Td [(vg)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)]TJ +/F59 9.9626 Tf -307.628 -31.88 Td [(iperm)]TJ 0 g 0 G - -49.98 -452.304 Td [(73)]TJ +/F62 9.9626 Tf 32.099 0 Td [(An)-250(integer)-250(array)-250(containing)-250(permutation)-250(information.)]TJ -7.193 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(one-dimensional)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -31.881 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(permuted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(or)-250(two)-250(dimensional)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -33.873 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -226.117 Td [(96)]TJ 0 g 0 G ET endstream endobj -1375 0 obj +1636 0 obj << -/Length 7006 +/Length 6238 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.2)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.17)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(cdins)-250(\227)-250(Communication)-250(descriptor)-250(insert)-250(routine)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdins\050nz,)-525(ia,)-525(ja,)-525(desc_a,)-525(info)-525([,ila,jla]\051)]TJ 0 -11.955 Td [(call)-525(psb_cdins\050nz,ja,desc,info[,jla,mask,lidx]\051)]TJ/F54 9.9626 Tf 14.944 -20.366 Td [(This)-336(subr)18(outine)-335(examines)-336(the)-336(edges)-335(of)-336(the)-336(graph)-335(associated)-336(with)-335(the)-336(dis-)]TJ -14.944 -11.955 Td [(cr)18(etization)-260(mesh)-261(\050and)-260(isomorphic)-260(to)-261(the)-260(sparsity)-260(pattern)-261(of)-260(a)-260(linear)-261(system)-260(co-)]TJ 0 -11.955 Td [(ef)18(\002cient)-238(matrix\051,)-241(storing)-238(them)-239(as)-238(necessary)-238(into)-239(the)-238(communication)-238(descriptor)74(.)]TJ 0 -11.955 Td [(In)-259(the)-260(\002rst)-259(form)-260(the)-259(edges)-259(ar)18(e)-260(speci\002ed)-259(as)-260(pairs)-259(of)-260(indices)]TJ/F52 9.9626 Tf 255.974 0 Td [(i)-47(a)]TJ/F85 10.3811 Tf 7.91 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)]TJ/F52 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.841 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-264(the)-260(start)1(-)]TJ -299.233 -11.956 Td [(ing)-299(index)]TJ/F52 9.9626 Tf 44.948 0 Td [(i)-47(a)]TJ/F85 10.3811 Tf 7.91 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 7.13 0 Td [(should)-299(belong)-299(to)-299(the)-299(curr)18(ent)-299(pr)18(ocess.)-458(In)-299(the)-299(second)-299(form)-299(only)]TJ -67.28 -11.955 Td [(the)-250(r)18(emote)-250(indices)]TJ/F52 9.9626 Tf 83.65 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.841 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 6.64 0 Td [(ar)18(e)-250(speci\002ed.)]TJ -0 g 0 G -/F51 9.9626 Tf -105.423 -20.366 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.304 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.305 Td [(nz)]TJ -0 g 0 G -/F54 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(points)-250(being)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.305 Td [(ia)]TJ -0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(glob)]TJ +ET +q +1 0 0 1 182.29 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 185.877 706.129 Td [(to)]TJ +ET +q +1 0 0 1 197.222 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 200.809 706.129 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -19.304 Td [(ja)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F67 9.9626 Tf -100.914 -18.964 Td [(call)-525(psb_glob_to_loc\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ 0 -11.955 Td [(call)-525(psb_glob_to_loc\050x,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -19.304 Td [(mask)]TJ +/F59 9.9626 Tf 0 -21.109 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.33 0 Td [(Mask)-247(entries)-248(in)]TJ/F59 9.9626 Tf 69.91 0 Td [(ja)]TJ/F54 9.9626 Tf 10.461 0 Td [(,)-248(they)-247(ar)18(e)-248(inserted)-247(only)-248(when)-247(the)-247(corr)18(esponding)]TJ/F59 9.9626 Tf 213.089 0 Td [(mask)]TJ/F54 9.9626 Tf -297.883 -11.956 Td [(entries)-250(ar)18(e)]TJ/F59 9.9626 Tf 48.139 0 Td [(.true.)]TJ/F54 9.9626 Tf -48.139 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 164.297 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(,)-250(default)]TJ/F59 9.9626 Tf 38.784 0 Td [(.true.)]TJ/F54 9.9626 Tf 31.382 0 Td [(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -269.706 -19.305 Td [(lidx)]TJ +/F59 9.9626 Tf -29.828 -19.602 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 22.685 0 Td [(User)-250(de\002ned)-250(local)-250(indices)-250(for)]TJ/F59 9.9626 Tf 131.117 0 Td [(ja)]TJ/F54 9.9626 Tf 10.46 0 Td [(.)]TJ -139.355 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -20.366 Td [(On)-250(Return)]TJ + 0 -19.601 Td [(x)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F62 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G - 0 -19.305 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -31.557 Td [(desc)]TJ ET q -1 0 0 1 171.218 168.346 cm +1 0 0 1 120.408 535.72 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 168.146 Td [(a)]TJ +/F59 9.9626 Tf 123.397 535.52 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(updated)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 120.525 cm +1 0 0 1 309.258 487.899 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 120.326 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 487.7 Td [(desc)]TJ ET q -1 0 0 1 384.755 120.525 cm +1 0 0 1 333.945 487.899 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 120.326 Td [(type)]TJ +/F67 9.9626 Tf 337.084 487.7 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - -91.236 -29.888 Td [(74)]TJ +/F59 9.9626 Tf -258.11 -19.602 Td [(iact)]TJ 0 g 0 G -ET - -endstream -endobj -1380 0 obj -<< -/Length 3007 ->> -stream +/F62 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F59 9.9626 Tf 253.796 0 Td [(global)]TJ/F62 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F67 9.9626 Tf 143.341 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F67 9.9626 Tf 29.808 0 Td [(W)]TJ/F62 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F67 9.9626 Tf 42.111 0 Td [(A)]TJ/F62 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F67 9.9626 Tf 55.839 0 Td [(I)]TJ/F62 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ 0 g 0 G +/F59 9.9626 Tf -316.929 -19.601 Td [(owned)]TJ 0 g 0 G +/F62 9.9626 Tf 35.975 0 Td [(Spec\002es)-250(valid)-250(range)-250(of)-250(input)-250(Scope:)]TJ/F59 9.9626 Tf 159.54 0 Td [(global)]TJ/F62 9.9626 Tf -170.608 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(If)-320(tr)8(ue,)-337(then)-320(only)-320(indices)-320(strictly)-320(owned)-320(by)-320(the)-319(curr)18(ent)-320(pr)18(ocess)-320(ar)18(e)-320(con-)]TJ 0 -11.955 Td [(sider)18(ed)-250(valid,)-250(if)-250(false)-250(then)-250(halo)-250(indices)-250(ar)18(e)-250(also)-250(accepted.)-310(Default:)-310(false.)]TJ 0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +/F59 9.9626 Tf -24.907 -21.109 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(ila)]TJ + 0 -19.601 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(starting)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(If)]TJ/F60 9.9626 Tf 9.727 0 Td [(y)]TJ/F62 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F60 9.9626 Tf 88.385 0 Td [(x)]TJ/F62 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -207.221 -19.925 Td [(jla)]TJ +/F59 9.9626 Tf -24.907 -19.602 Td [(y)]TJ 0 g 0 G -/F54 9.9626 Tf 16.598 0 Td [(the)-250(local)-250(indices)-250(of)-250(the)-250(end)-250(vertex)-250(of)-250(the)-250(edges)-250(being)-250(inserted.)]TJ 8.309 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(length)]TJ/F52 9.9626 Tf 171.978 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ/F51 11.9552 Tf -207.221 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 10.521 0 Td [(If)]TJ/F60 9.9626 Tf 9.521 0 Td [(y)]TJ/F62 9.9626 Tf 7.827 0 Td [(is)-273(pr)18(esent,)-279(then)]TJ/F60 9.9626 Tf 70.133 0 Td [(y)]TJ/F62 9.9626 Tf 7.827 0 Td [(is)-273(overwritten)-273(with)-273(the)-273(translated)-274(integer)-273(indices,)-279(and)]TJ/F60 9.9626 Tf -80.628 -11.955 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F59 9.9626 Tf 112.557 0 Td [(global)]TJ/F62 9.9626 Tf -120.547 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -24.907 -19.602 Td [(info)]TJ 0 g 0 G - [-500(This)-250(r)18(outine)-250(may)-250(only)-250(be)-250(called)-250(if)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(build)-250(state;)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.108 Td [(Notes)]TJ 0 g 0 G - 0 -19.925 Td [(2.)]TJ +/F62 9.9626 Tf 166.875 -29.888 Td [(97)]TJ 0 g 0 G - [-500(This)-370(r)18(outine)-370(automatically)-370(ignor)18(es)-370(edges)-370(that)-370(do)-370(not)-370(insist)-370(on)-370(the)-370(cur)18(-)]TJ 12.453 -11.955 Td [(r)18(ent)-288(pr)18(ocess,)-298(i.e.)-424(edges)-288(for)-288(which)-288(neither)-288(the)-288(starting)-288(nor)-288(the)-288(end)-288(vertex)]TJ 0 -11.955 Td [(belong)-250(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess.)]TJ +ET + +endstream +endobj +1641 0 obj +<< +/Length 672 +>> +stream 0 g 0 G - -12.453 -19.926 Td [(3.)]TJ 0 g 0 G - [-500(The)-322(second)-323(form)-322(of)-323(this)-322(r)18(outine)-323(will)-322(be)-322(useful)-323(when)-322(dealing)-323(with)-322(user)18(-)]TJ 12.453 -11.955 Td [(speci\002ed)-250(index)-250(mappings;)-250(see)-250(also)]TJ -0 0 1 rg 0 0 1 RG - [-250(2.3.1)]TJ 0 g 0 G - [(.)]TJ +BT +/F62 9.9626 Tf 163.158 706.129 Td [(1.)]TJ 0 g 0 G - 141.968 -314.819 Td [(75)]TJ + [-500(If)-272(an)-273(input)-272(index)-272(is)-273(out)-272(of)-273(range)1(,)-278(then)-273(the)-272(corr)18(esponding)-272(output)-273(index)-272(is)]TJ 12.453 -11.955 Td [(set)-250(to)-250(a)-250(negative)-250(number;)]TJ +0 g 0 G + -12.453 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(The)-416(default)]TJ/F67 9.9626 Tf 68.74 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e)-416(means)-417(that)-416(the)-417(negative)-416(output)-416(is)-417(the)-416(only)-416(action)]TJ -61.517 -11.955 Td [(taken)-250(on)-250(an)-250(out-of-range)-250(input.)]TJ +0 g 0 G + 141.968 -571.855 Td [(98)]TJ 0 g 0 G ET endstream endobj -1389 0 obj +1648 0 obj << -/Length 5969 +/Length 5458 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.3)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.18)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(loc)]TJ +ET +q +1 0 0 1 173.646 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(cdasb)-250(\227)-250(Communication)-250(descriptor)-250(assembly)-250(routine)]TJ +/F59 11.9552 Tf 177.233 706.129 Td [(to)]TJ +ET +q +1 0 0 1 188.578 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 192.165 706.129 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdasb\050desc_a,)-525(info)-525([,)-525(mold]\051)]TJ +/F67 9.9626 Tf -92.27 -18.964 Td [(call)-525(psb_loc_to_glob\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact\051)]TJ 0 -11.955 Td [(call)-525(psb_loc_to_glob\050x,)-525(desc_a,)-525(info,)-525(iact\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F62 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -31.881 Td [(desc)]TJ ET q -1 0 0 1 171.218 625.596 cm +1 0 0 1 120.408 533.94 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 625.397 Td [(a)]TJ +/F59 9.9626 Tf 123.397 533.74 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 577.775 cm +1 0 0 1 309.258 486.119 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 485.92 Td [(desc)]TJ ET q -1 0 0 1 384.755 577.775 cm +1 0 0 1 333.945 486.119 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 577.576 Td [(type)]TJ +/F67 9.9626 Tf 337.084 485.92 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(mold)]TJ +/F59 9.9626 Tf -258.11 -19.926 Td [(iact)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F59 9.9626 Tf 253.796 0 Td [(global)]TJ/F62 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F67 9.9626 Tf 143.341 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F67 9.9626 Tf 29.808 0 Td [(W)]TJ/F62 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F67 9.9626 Tf 42.111 0 Td [(A)]TJ/F62 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F67 9.9626 Tf 55.839 0 Td [(I)]TJ/F62 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ +0 g 0 G +/F59 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(If)]TJ/F60 9.9626 Tf 9.727 0 Td [(y)]TJ/F62 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F60 9.9626 Tf 88.385 0 Td [(x)]TJ/F62 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(y)]TJ +0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(If)]TJ/F60 9.9626 Tf 9.705 0 Td [(y)]TJ/F62 9.9626 Tf 8.011 0 Td [(is)-292(not)-291(pr)18(esent,)-302(then)]TJ/F60 9.9626 Tf 88.122 0 Td [(y)]TJ/F62 9.9626 Tf 8.011 0 Td [(is)-292(overwritten)-291(with)-292(the)-291(translated)-292(integer)-292(indice)1(s,)]TJ -99.463 -11.955 Td [(and)]TJ/F60 9.9626 Tf 19.651 0 Td [(x)]TJ/F62 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F59 9.9626 Tf 112.557 0 Td [(global)]TJ/F62 9.9626 Tf -139.904 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 141.968 -114.535 Td [(99)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(index)-250(storage.)]TJ -3.875 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(object)-190(of)-190(type)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F59 9.9626 Tf 221.926 0 Td [(psb)]TJ -ET -q -1 0 0 1 413.855 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 416.994 509.83 Td [(T)]TJ ET -q -1 0 0 1 422.851 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1653 0 obj +<< +/Length 3169 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 425.99 509.83 Td [(base)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.19)-1000(psb)]TJ ET q -1 0 0 1 447.539 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 450.677 509.83 Td [(vect)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(is)]TJ ET q -1 0 0 1 472.226 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 217.809 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 475.364 509.83 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F59 11.9552 Tf 221.396 706.129 Td [(owned)-250(\227)]TJ 0 g 0 G -/F51 9.9626 Tf -345.58 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G +/F67 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_owned\050x,)-525(desc_a\051)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 171.218 468.186 cm +1 0 0 1 171.218 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 467.987 Td [(a)]TJ +/F59 9.9626 Tf 174.207 545.696 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 420.366 cm +1 0 0 1 360.068 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 420.166 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 497.875 Td [(desc)]TJ ET q -1 0 0 1 384.755 420.366 cm +1 0 0 1 384.755 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 420.166 Td [(type)]TJ +/F67 9.9626 Tf 387.893 497.875 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.917 Td [(Notes)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.926 Td [(1.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(A)-261(logical)-260(mask)-261(which)-261(is)-261(tr)8(ue)-261(if)]TJ/F60 9.9626 Tf 137.304 0 Td [(x)]TJ/F62 9.9626 Tf 7.803 0 Td [(is)-261(owned)-261(by)-260(the)-261(curr)18(ent)-261(pr)18(o-)]TJ -192.978 -11.955 Td [(cess)-250(Scope:)]TJ/F59 9.9626 Tf 51.567 0 Td [(local)]TJ/F62 9.9626 Tf -51.567 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ/F59 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(descriptor)-250(is)-250(in)-250(the)-250(assembled)-250(state.)]TJ -12.453 -19.925 Td [(This)-274(call)-275(will)-274(set)-275(up)-274(all)-275(the)-274(necessary)-275(information)-274(for)-275(the)-274(halo)-275(data)-274(exchanges.)]TJ 0 -11.955 Td [(In)-337(doing)-337(so,)-358(the)-337(library)-337(will)-337(need)-336(to)-337(identify)-337(the)-337(set)-337(of)-337(pr)18(ocesse)1(s)-337(owning)-337(the)]TJ 0 -11.955 Td [(halo)-381(indices)-381(thr)18(ough)-380(the)-381(use)-381(of)-381(the)]TJ/F59 9.9626 Tf 163.76 0 Td [(desc%fnd_owner\050\051)]TJ/F54 9.9626 Tf 87.479 0 Td [(method;)-446(the)-381(owning)]TJ -251.239 -11.956 Td [(pr)18(ocesses)-273(ar)18(e)-273(the)-273(topological)-272(neighbours)-273(of)-273(the)-273(calling)-273(pr)18(ocess.)-379(If)-272(the)-273(user)-273(has)]TJ 0 -11.955 Td [(some)-248(backgr)18(ound)-248(information)-248(on)-248(the)-248(pr)18(ocesses)-248(that)-248(ar)18(e)-248(neighbours)-248(of)-248(the)-248(cur)18(-)]TJ 0 -11.955 Td [(r)18(ent)-274(one,)-281(it)-274(is)-275(possible)-274(to)-274(specify)-275(explicitly)-274(the)-274(list)-275(of)-274(adjacent)-274(pr)18(ocesses)-275(with)-274(a)]TJ 0 -11.955 Td [(call)-327(to)]TJ/F59 9.9626 Tf 30.401 0 Td [(desc%set_p_adjcncy\050list\051)]TJ/F54 9.9626 Tf 125.529 0 Td [(;)-365(this)-327(will)-327(speed)-327(u)1(p)-327(the)-327(subsequent)-327(call)-327(to)]TJ/F59 9.9626 Tf -155.93 -11.955 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 47.073 0 Td [(.)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - 119.801 -116.528 Td [(76)]TJ + [-500(This)-240(r)18(outine)-239(r)18(eturns)-240(a)]TJ/F67 9.9626 Tf 109.67 0 Td [(.true.)]TJ/F62 9.9626 Tf 33.769 0 Td [(value)-240(for)-239(an)-240(index)-239(that)-240(is)-239(strictly)-240(owned)-239(by)]TJ -130.986 -11.955 Td [(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +0 g 0 G + 139.477 -263.975 Td [(100)]TJ 0 g 0 G ET endstream endobj -1396 0 obj +1659 0 obj << -/Length 3168 +/Length 4795 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.4)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.20)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(owned)]TJ +ET +q +1 0 0 1 194.903 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(cdcpy)-250(\227)-250(Copies)-250(a)-250(communication)-250(descriptor)]TJ +/F59 11.9552 Tf 198.489 706.129 Td [(index)-250(\227)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_cdcpy\050desc_in,)-525(desc_out,)-525(info\051)]TJ +/F67 9.9626 Tf -98.594 -18.964 Td [(call)-525(psb_owned_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F62 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 120.408 625.596 cm +1 0 0 1 120.408 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 625.397 Td [(in)]TJ +/F59 9.9626 Tf 123.397 545.696 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -12.981 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 577.775 cm +1 0 0 1 309.258 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 577.576 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 497.875 Td [(desc)]TJ ET q -1 0 0 1 333.945 577.775 cm +1 0 0 1 333.945 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 577.576 Td [(type)]TJ +/F67 9.9626 Tf 337.084 497.875 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ 0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F59 9.9626 Tf 253.796 0 Td [(global)]TJ/F62 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F67 9.9626 Tf 143.341 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F67 9.9626 Tf 29.808 0 Td [(W)]TJ/F62 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F67 9.9626 Tf 42.111 0 Td [(A)]TJ/F62 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F67 9.9626 Tf 55.839 0 Td [(I)]TJ/F62 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 535.932 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 535.733 Td [(out)]TJ +/F59 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 19.925 0 Td [(the)-250(communication)-250(descriptor)-250(copy)111(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 488.112 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 487.912 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 488.112 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 487.912 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + 0 -19.925 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(info)]TJ +/F62 9.9626 Tf 10.521 0 Td [(A)-200(logical)-200(mask)-200(which)-201(is)-200(tr)8(ue)-200(for)-200(all)-200(corr)18(esponding)-200(entries)-200(of)]TJ/F60 9.9626 Tf 260.812 0 Td [(x)]TJ/F62 9.9626 Tf 7.2 0 Td [(that)-200(ar)18(e)-200(owned)]TJ -253.626 -11.955 Td [(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F59 9.9626 Tf 131.027 0 Td [(local)]TJ/F62 9.9626 Tf -131.027 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ 0 g 0 G - 141.968 -329.728 Td [(77)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ +0 g 0 G + [-500(This)-429(r)18(outine)-428(r)18(eturns)-429(a)]TJ/F67 9.9626 Tf 117.209 0 Td [(.true.)]TJ/F62 9.9626 Tf 35.654 0 Td [(value)-429(for)-428(those)-429(indices)-429(that)-429(ar)18(e)-428(strictly)]TJ -140.41 -11.955 Td [(owned)-250(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +0 g 0 G + 139.477 -140.438 Td [(101)]TJ 0 g 0 G ET endstream endobj -1401 0 obj +1665 0 obj << -/Length 2167 +/Length 3147 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.5)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.21)-1000(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(is)]TJ +ET +q +1 0 0 1 217.809 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(cdfree)-250(\227)-250(Frees)-250(a)-250(communication)-250(descriptor)]TJ +/F59 11.9552 Tf 221.396 706.129 Td [(local)-250(\227)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_cdfree\050desc_a,)-525(info\051)]TJ +/F67 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_local\050x,)-525(desc_a\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ + 0 -19.925 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 171.218 625.596 cm +1 0 0 1 171.218 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 625.397 Td [(a)]TJ +/F59 9.9626 Tf 174.207 545.696 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)-250(to)-250(be)-250(fr)18(eed.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 577.775 cm +1 0 0 1 360.068 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 577.576 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 497.875 Td [(desc)]TJ ET q -1 0 0 1 384.755 577.775 cm +1 0 0 1 384.755 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 387.893 577.576 Td [(type)]TJ +/F67 9.9626 Tf 387.893 497.875 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(info)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(A)-244(logical)-244(mask)-243(which)-244(is)-244(tr)8(ue)-244(if)]TJ/F60 9.9626 Tf 136.118 0 Td [(x)]TJ/F62 9.9626 Tf 7.635 0 Td [(is)-244(local)-244(to)-243(the)-244(curr)18(ent)-244(pr)18(ocess)]TJ -191.623 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ/F59 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ 0 g 0 G - 141.968 -397.474 Td [(78)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(This)-199(r)18(outine)-200(r)18(eturns)-199(a)]TJ/F67 9.9626 Tf 108.069 0 Td [(.true.)]TJ/F62 9.9626 Tf 33.369 0 Td [(value)-199(for)-200(an)-199(index)-199(that)-200(is)-199(local)-200(to)-199(the)-199(curr)18(ent)]TJ -128.984 -11.955 Td [(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices)]TJ +0 g 0 G + 139.476 -263.975 Td [(102)]TJ 0 g 0 G ET endstream endobj -1407 0 obj +1671 0 obj << -/Length 5710 +/Length 4785 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.6)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.22)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(local)]TJ +ET +q +1 0 0 1 183.605 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(cdbldext)-190(\227)-190(Build)-190(an)-190(extended)-190(communication)-190(descrip-)]TJ -24.221 -13.948 Td [(tor)]TJ +/F59 11.9552 Tf 187.192 706.129 Td [(index)-250(\227)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -26.9 -19.693 Td [(call)-525(psb_cdbldext\050a,desc_a,nl,desc_out,)-525(info,)-525(extype\051)]TJ/F54 9.9626 Tf 14.944 -23.422 Td [(This)-379(subr)18(outine)-379(builds)-379(an)-379(extended)-379(communication)-379(descriptor)74(,)-411(based)-379(on)]TJ -14.944 -11.955 Td [(the)-428(input)-428(descriptor)]TJ/F59 9.9626 Tf 95.499 0 Td [(desc_a)]TJ/F54 9.9626 Tf 35.646 0 Td [(and)-428(on)-428(the)-428(stencil)-428(speci\002ed)-428(thr)18(ough)-428(the)-427(input)]TJ -131.145 -11.955 Td [(sparse)-250(matrix)]TJ/F59 9.9626 Tf 62.107 0 Td [(a)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +/F67 9.9626 Tf -87.297 -18.964 Td [(call)-525(psb_local_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf -67.337 -21.054 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -21.429 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -21.43 Td [(a)]TJ + 0 -19.925 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)-250(Scope:)]TJ/F51 9.9626 Tf 100.691 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -107.326 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(type.)]TJ +/F62 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F62 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.429 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ ET q -1 0 0 1 120.408 504.147 cm +1 0 0 1 120.408 545.895 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 123.397 503.948 Td [(a)]TJ +/F59 9.9626 Tf 123.397 545.696 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 456.326 cm +1 0 0 1 309.258 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 456.127 Td [(Tspmat)]TJ +/F67 9.9626 Tf 312.397 497.875 Td [(desc)]TJ ET q -1 0 0 1 344.406 456.326 cm +1 0 0 1 333.945 498.074 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 347.544 456.127 Td [(type)]TJ +/F67 9.9626 Tf 337.084 497.875 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -21.43 Td [(nl)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(the)-250(number)-250(of)-250(additional)-250(layers)-250(desir)18(ed.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F52 9.9626 Tf 131.102 0 Td [(n)-25(l)]TJ/F83 10.3811 Tf 11.873 0 Td [(\025)]TJ/F54 9.9626 Tf 10.961 0 Td [(0.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F59 9.9626 Tf 253.796 0 Td [(global)]TJ/F62 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F67 9.9626 Tf 143.341 0 Td [(I)]TJ/F62 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F67 9.9626 Tf 29.808 0 Td [(W)]TJ/F62 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F67 9.9626 Tf 42.111 0 Td [(A)]TJ/F62 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F67 9.9626 Tf 55.839 0 Td [(I)]TJ/F62 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ 0 g 0 G -/F51 9.9626 Tf -178.843 -21.43 Td [(extype)]TJ +/F59 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 34.869 0 Td [(the)-250(kind)-250(of)-250(estension)-250(r)18(equir)18(ed.)]TJ -9.962 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 40.677 0 Td [(.)]TJ -64.677 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-345(as:)-501(an)-345(integer)-346(value)]TJ/F59 9.9626 Tf 136.676 0 Td [(psb_ovt_xhal_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ/F59 9.9626 Tf 6.169 0 Td [(psb_ovt_asov_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)-369(default:)]TJ/F59 9.9626 Tf -278.833 -11.955 Td [(psb_ovt_xhal_)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -23.422 Td [(On)-250(Return)]TJ + 0 -19.925 Td [(y)]TJ 0 g 0 G +/F62 9.9626 Tf 10.521 0 Td [(A)-270(logical)-270(mask)-270(which)-271(is)-270(tr)8(ue)-270(for)-270(all)-270(corr)18(esponding)-270(entries)-270(of)]TJ/F60 9.9626 Tf 268.484 0 Td [(x)]TJ/F62 9.9626 Tf 7.897 0 Td [(that)-270(ar)18(e)-270(local)]TJ -261.995 -11.955 Td [(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F59 9.9626 Tf 128.666 0 Td [(local)]TJ/F62 9.9626 Tf -128.666 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ 0 g 0 G - 0 -21.43 Td [(desc)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ +0 g 0 G + [-500(This)-264(r)18(outine)-265(r)18(eturns)-264(a)]TJ/F67 9.9626 Tf 110.663 0 Td [(.true.)]TJ/F62 9.9626 Tf 34.017 0 Td [(value)-264(for)-265(those)-264(indices)-265(that)-264(ar)18(e)-265(local)-264(to)-265(the)]TJ -132.227 -11.955 Td [(curr)18(ent)-250(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices.)]TJ +0 g 0 G + 139.477 -140.438 Td [(103)]TJ +0 g 0 G +ET + +endstream +endobj +1678 0 obj +<< +/Length 3647 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.23)-1000(psb)]TJ ET q -1 0 0 1 120.408 261.018 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(get)]TJ +ET +q +1 0 0 1 225.126 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 123.397 260.819 Td [(out)]TJ +/F59 11.9552 Tf 228.712 706.129 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -78.007 -18.964 Td [(call)-525(psb_get_boundary\050bndel,)-525(desc,)-525(info\051)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(desc)]TJ 0 g 0 G -/F54 9.9626 Tf 19.925 0 Td [(the)-250(extended)-250(communication)-250(descriptor)74(.)]TJ -18.52 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 213.198 cm +1 0 0 1 360.068 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 212.998 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 577.576 Td [(desc)]TJ ET q -1 0 0 1 333.945 213.198 cm +1 0 0 1 384.755 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 212.998 Td [(type)]TJ +/F67 9.9626 Tf 387.893 577.576 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.429 Td [(info)]TJ +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -23.422 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(79)]TJ + 0 -19.925 Td [(bndel)]TJ 0 g 0 G -ET - -endstream -endobj -1412 0 obj -<< -/Length 1484 ->> -stream +/F62 9.9626 Tf 31.541 0 Td [(The)-307(list)-307(of)-307(boundary)-307(elements)-307(on)-306(the)-307(calling)-307(pr)18(ocess,)-321(in)-307(local)-307(number)18(-)]TJ -6.635 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(a)-1(r)1(ray)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.906 -31.881 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 163.158 706.129 Td [(1.)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(Specifying)]TJ/F59 9.9626 Tf 61.745 0 Td [(psb_ovt_xhal_)]TJ/F54 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F59 9.9626 Tf 32.282 0 Td [(extype)]TJ/F54 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-400(descriptor)-400(for)-400(a)-400(domain)-400(partition)-400(in)-400(which)-400(the)-400(additional)-400(layers)-400(ar)18(e)]TJ 0 -11.955 Td [(fetched)-222(as)-221(part)-222(of)-221(an)-222(\050extended\051)-221(halo;)-232(however)-221(the)-222(index-to-pr)18(ocess)-221(map-)]TJ 0 -11.956 Td [(ping)-250(is)-250(identical)-250(to)-250(that)-250(of)-250(the)-250(base)-250(descriptor;)]TJ + [-500(If)-219(ther)18(e)-220(ar)18(e)-219(no)-220(boundary)-219(elements)-219(\050i.e.,)-226(if)-219(the)-220(local)-219(part)-219(of)-220(the)-219(connectivity)]TJ 12.453 -11.956 Td [(graph)-391(is)-392(self-contained\051)-391(the)-392(output)-391(vector)-391(is)-392(set)-391(to)-391(the)-392(\223not)-391(allocated\224)]TJ 0 -11.955 Td [(state.)]TJ 0 g 0 G -12.453 -19.925 Td [(2.)]TJ 0 g 0 G - [-500(Specifying)]TJ/F59 9.9626 Tf 61.745 0 Td [(psb_ovt_asov_)]TJ/F54 9.9626 Tf 70.881 0 Td [(for)-290(the)]TJ/F59 9.9626 Tf 32.282 0 Td [(extype)]TJ/F54 9.9626 Tf 34.269 0 Td [(ar)18(gument)-290(the)-289(user)-290(will)-290(obtain)]TJ -186.724 -11.955 Td [(a)-330(descriptor)-331(with)-330(an)-330(overlapped)-331(decomposition:)-470(the)-331(additional)-330(layer)-330(is)]TJ 0 -11.955 Td [(aggr)18(egated)-326(to)-326(the)-326(local)-326(subdomain)-326(\050and)-326(thus)-326(is)-325(an)-326(overlap\051,)-345(and)-326(a)-326(new)]TJ 0 -11.955 Td [(halo)-250(extending)-250(beyond)-250(the)-250(last)-250(additional)-250(layer)-250(is)-250(formed.)]TJ + [-500(Otherwise)-206(the)-205(size)-206(of)]TJ/F67 9.9626 Tf 105.891 0 Td [(bndel)]TJ/F62 9.9626 Tf 28.201 0 Td [(will)-206(be)-205(exactly)-206(equal)-206(to)-206(the)-205(number)-206(of)-206(bound-)]TJ -121.639 -11.955 Td [(ary)-250(elements.)]TJ 0 g 0 G - 141.968 -524.035 Td [(80)]TJ + 139.477 -196.229 Td [(104)]TJ 0 g 0 G ET endstream endobj -1420 0 obj +1685 0 obj << -/Length 5699 +/Length 3458 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.7)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(6.24)-1000(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 156.993 706.129 Td [(get)]TJ +ET +q +1 0 0 1 174.316 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(spall)-250(\227)-250(Allocates)-250(a)-250(sparse)-250(matrix)]TJ +/F59 11.9552 Tf 177.903 706.129 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.121 -19.277 Td [(call)-525(psb_spall\050a,)-525(desc_a,)-525(info)-525([,)-525(nnz,)-525(dupl,)-525(bldmode]\051)]TJ +/F67 9.9626 Tf -78.008 -18.964 Td [(call)-525(psb_get_overlap\050ovrel,)-525(desc,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.403 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.571 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -20.572 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 623.505 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 623.306 Td [(a)]TJ + 0 -19.925 Td [(desc)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 309.258 575.684 cm +1 0 0 1 309.258 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 312.397 575.485 Td [(desc)]TJ +/F67 9.9626 Tf 312.397 577.576 Td [(desc)]TJ ET q -1 0 0 1 333.945 575.684 cm +1 0 0 1 333.945 577.775 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 337.084 575.485 Td [(type)]TJ +/F67 9.9626 Tf 337.084 577.576 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -20.572 Td [(nnz)]TJ +/F59 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(An)-230(estimate)-230(of)-230(the)-230(number)-230(of)-231(nonzer)18(oes)-230(in)-230(the)-230(local)-230(part)-230(of)-230(the)-230(assembled)]TJ 2.77 -11.955 Td [(matrix.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.572 Td [(dupl)]TJ + 0 -19.925 Td [(ovrel)]TJ 0 g 0 G -/F54 9.9626 Tf 26.561 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F59 9.9626 Tf 164.941 0 Td [(psb_dupl_ovwrt_)]TJ/F54 9.9626 Tf 78.455 0 Td [(,)]TJ/F59 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F54 9.9626 Tf 67.994 0 Td [(.)]TJ +/F62 9.9626 Tf 28.234 0 Td [(The)-250(list)-250(of)-250(overlap)-250(elements)-250(on)-250(the)-250(calling)-250(pr)18(ocess,)-250(in)-250(local)-250(numbering.)]TJ -3.327 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(array)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -92.901 -20.572 Td [(bldmode)]TJ +/F59 9.9626 Tf -24.907 -31.88 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 45.938 0 Td [(Whether)-372(to)-372(kee)1(p)-372(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F59 9.9626 Tf 128.287 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 94.146 0 Td [(.)]TJ -331.543 -11.955 Td [(Default:)]TJ/F59 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -168.029 -22.402 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G + [-500(If)-287(ther)18(e)-287(ar)18(e)-287(no)-287(overlap)-287(elements)-287(the)-287(output)-287(vector)-287(is)-287(set)-287(to)-287(the)-287(\223not)-287(allo-)]TJ 12.453 -11.955 Td [(cated\224)-250(state.)]TJ 0 g 0 G - 0 -20.572 Td [(a)]TJ + -12.453 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(Otherwise)-194(the)-194(size)-195(of)]TJ/F67 9.9626 Tf 105.434 0 Td [(ovrel)]TJ/F62 9.9626 Tf 28.087 0 Td [(will)-194(be)-194(exactly)-195(equal)-194(to)-194(the)-194(number)-195(of)-194(overlap)]TJ -121.068 -11.955 Td [(elements.)]TJ +0 g 0 G + 139.477 -220.139 Td [(105)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET + +endstream +endobj +1692 0 obj +<< +/Length 5480 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(6.25)-1000(psb)]TJ ET q -1 0 0 1 309.258 231.892 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 312.397 231.692 Td [(Tspmat)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(sp)]TJ ET q -1 0 0 1 344.406 231.892 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 221.133 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 347.544 231.692 Td [(type)]TJ +/F59 11.9552 Tf 224.719 706.129 Td [(getrow)-250(\227)-250(Extract)-250(row\050s\051)-250(from)-250(a)-250(sparse)-250(matrix)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -20.571 Td [(info)]TJ +/F67 9.9626 Tf -74.014 -19.204 Td [(call)-525(psb_sp_getrow\050row,)-525(a,)-525(nz,)-525(ia,)-525(ja,)-525(val,)-525(info,)-525(&)]TJ 73.225 -11.955 Td [(&)-525(append,)-525(nzin,)-525(lrw\051)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -22.564 Td [(Notes)]TJ +/F59 9.9626 Tf -73.225 -22.29 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -20.41 Td [(1.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(build)-250(state.)]TJ +/F59 9.9626 Tf -29.828 -20.42 Td [(On)-250(Entry)]TJ 0 g 0 G - 154.421 -29.888 Td [(81)]TJ 0 g 0 G + 0 -20.421 Td [(row)]TJ +0 g 0 G +/F62 9.9626 Tf 22.695 0 Td [(The)-250(\050\002rst\051)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 2.212 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F69 10.3811 Tf 104.322 0 Td [(>)]TJ/F62 9.9626 Tf 10.962 0 Td [(0.)]TJ +0 g 0 G +/F59 9.9626 Tf -140.191 -20.42 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(fr)18(om)-250(which)-250(to)-250(get)-250(r)18(ows.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.137 0 Td [(psb)]TJ ET - -endstream -endobj -1425 0 obj -<< -/Length 1141 ->> -stream +q +1 0 0 1 360.068 495.976 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 495.777 Td [(Tspmat)]TJ +ET +q +1 0 0 1 395.216 495.976 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 398.354 495.777 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.57 -20.421 Td [(append)]TJ +0 g 0 G +/F62 9.9626 Tf 39.292 0 Td [(Whether)-250(to)-250(append)-250(or)-250(overwrite)-250(existing)-250(output.)]TJ -14.386 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value)-250(default:)-310(false)-250(\050overwrite\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -20.421 Td [(nzin)]TJ 0 g 0 G +/F62 9.9626 Tf 25.454 0 Td [(Input)-250(size)-250(to)-250(be)-250(appended)-250(to.)]TJ -0.548 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-304(as:)-417(an)-303(integer)]TJ/F69 10.3811 Tf 106.988 0 Td [(>)]TJ/F62 9.9626 Tf 11.949 0 Td [(0.)-471(When)-303(append)-303(is)-304(tr)8(ue,)-317(speci\002es)-303(how)-304(many)]TJ -118.937 -11.955 Td [(entries)-250(in)-250(the)-250(output)-250(vectors)-250(ar)18(e)-250(alr)18(eady)-250(\002lled.)]TJ 0 g 0 G +/F59 9.9626 Tf -24.906 -20.421 Td [(lrw)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 163.158 706.129 Td [(2.)]TJ +/F62 9.9626 Tf 20.473 0 Td [(The)-250(last)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F69 10.3811 Tf 104.323 0 Td [(>)]TJ/F62 9.9626 Tf 10.962 0 Td [(0,)-250(default:)]TJ/F60 9.9626 Tf 46.878 0 Td [(r)-17(o)-35(w)]TJ/F62 9.9626 Tf 16.134 0 Td [(.)]TJ 0 g 0 G - [-500(The)-250(descriptor)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(assembled)-250(state.)]TJ +/F59 9.9626 Tf -203.203 -22.29 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -19.925 Td [(3.)]TJ 0 g 0 G - [-500(Pr)18(oviding)-219(a)-219(good)-219(estimate)-218(for)-219(the)-219(number)-219(of)-219(nonzer)18(oes)]TJ/F52 9.9626 Tf 255.761 0 Td [(n)-25(n)-25(z)]TJ/F54 9.9626 Tf 18.305 0 Td [(in)-219(the)-219(assem-)]TJ -261.613 -11.956 Td [(bled)-295(matri)1(x)-295(may)-294(substantially)-295(impr)18(ove)-294(performance)-295(in)-294(the)-295(matrix)-294(build)]TJ 0 -11.955 Td [(phase,)-370(as)-346(it)-346(will)-345(r)18(educe)-346(or)-346(eliminate)-346(the)-346(need)-346(for)-345(\050potentially)-346(multiple\051)]TJ 0 -11.955 Td [(data)-250(r)18(eallocations;)]TJ + 0 -20.42 Td [(nz)]TJ +0 g 0 G +/F62 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(elements)-250(r)18(eturned)-250(by)-250(this)-250(call.)]TJ 8.856 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ 0 g 0 G - -12.453 -19.925 Td [(4.)]TJ +/F59 9.9626 Tf -24.906 -20.421 Td [(ia)]TJ 0 g 0 G - [-500(Using)]TJ/F59 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 97.28 0 Td [(is)-315(likel)1(y)-315(to)-315(cause)-314(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ +/F62 9.9626 Tf 13.28 0 Td [(the)-250(r)18(ow)-250(indices.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.344 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F67 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F62 9.9626 Tf 60.025 0 Td [(attribute.)]TJ 0 g 0 G - 141.968 -528.02 Td [(82)]TJ + -89.662 -29.887 Td [(106)]TJ 0 g 0 G ET endstream endobj -1433 0 obj +1696 0 obj << -/Length 5375 +/Length 3529 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.8)-1000(psb)]TJ -ET -q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 151.016 706.129 Td [(spins)-233(\227)-233(Insert)-233(a)-233(set)-233(of)-234(coef)18(\002cients)-233(into)-233(a)-233(sparse)-233(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.121 -20.373 Td [(call)-525(psb_spins\050nz,)-525(ia,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ 0 -11.956 Td [(call)-525(psb_spins\050nr,)-525(irw,)-525(irp,)-525(ja,)-525(val,)-525(a,)-525(desc_a,)-525(info)-525([,local]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -24.099 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.835 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.834 Td [(nz)]TJ -0 g 0 G -/F54 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 8.857 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.834 Td [(nr)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(ja)]TJ 0 g 0 G -/F54 9.9626 Tf 14.944 0 Td [(the)-250(number)-250(of)-250(r)18(ows)-250(to)-250(be)-250(inserted.)]TJ 9.963 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +/F62 9.9626 Tf 13.281 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F67 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F62 9.9626 Tf 60.024 0 Td [(attribute.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.834 Td [(irw)]TJ +/F59 9.9626 Tf -254.045 -19.925 Td [(val)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(the)-250(\002rst)-250(r)18(ow)-250(to)-250(be)-250(inserted.)]TJ 4.434 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +/F62 9.9626 Tf 18.82 0 Td [(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(array)-250(with)-250(the)]TJ/F67 9.9626 Tf 148.761 0 Td [(ALLOCATABLE)]TJ/F62 9.9626 Tf 60.024 0 Td [(attribute.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.834 Td [(ia)]TJ +/F59 9.9626 Tf -233.692 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(the)-250(r)18(ow)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F52 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F51 9.9626 Tf -196.043 -22.834 Td [(irp)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(the)-250(r)18(ow)-250(pointers)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.645 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F52 9.9626 Tf 160.8 0 Td [(n)-15(r)]TJ/F85 10.3811 Tf 11.85 0 Td [(+)]TJ/F54 9.9626 Tf 10.131 0 Td [(1.)]TJ + [-500(The)-307(output)]TJ/F60 9.9626 Tf 65.308 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 13.394 0 Td [(is)-307(always)-307(the)-307(size)-307(of)-307(the)-307(output)-307(generated)-307(by)-306(the)-307(curr)18(ent)]TJ -66.249 -11.955 Td [(call;)-283(thus,)-277(if)]TJ/F67 9.9626 Tf 53.971 0 Td [(append=.true.)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)-278(the)-271(total)-272(output)-272(size)-272(will)-272(be)]TJ/F60 9.9626 Tf 129.372 0 Td [(n)-25(z)-18(i)-32(n)]TJ/F93 10.3811 Tf 21.286 0 Td [(+)]TJ/F60 9.9626 Tf 10.336 0 Td [(n)-25(z)]TJ/F62 9.9626 Tf 10.337 0 Td [(,)-277(with)]TJ -293.296 -11.955 Td [(the)-292(newly)-293(extracted)-292(coef)18(\002cients)-293(stor)18(ed)-292(in)-293(entries)]TJ/F67 9.9626 Tf 217.177 0 Td [(nzin+1:nzin+nz)]TJ/F62 9.9626 Tf 76.139 0 Td [(of)-292(the)]TJ -293.316 -11.955 Td [(array)-250(ar)18(guments;)]TJ 0 g 0 G -/F51 9.9626 Tf -207.688 -22.835 Td [(ja)]TJ + -12.453 -19.926 Td [(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 11.627 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(size)]TJ/F52 9.9626 Tf 160.8 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.336 0 Td [(.)]TJ + [-500(When)]TJ/F67 9.9626 Tf 41.275 0 Td [(append=.true.)]TJ/F62 9.9626 Tf 70.485 0 Td [(the)-250(output)-250(arrays)-250(ar)18(e)-250(r)18(eallocated)-250(as)-250(necessary;)]TJ 0 g 0 G -/F51 9.9626 Tf -196.043 -22.835 Td [(val)]TJ + -111.76 -19.925 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 18.82 0 Td [(the)-250(coef)18(\002cients)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-315(as:)-439(an)-314(array)-315(of)-315(size)]TJ/F52 9.9626 Tf 131.853 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.337 0 Td [(.)-504(Must)-314(be)-315(of)-315(the)-314(same)-315(type)-315(and)-314(kind)-315(of)]TJ -142.19 -11.956 Td [(the)-250(coef)18(\002cients)-250(of)-250(the)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 157.901 0 Td [(a)]TJ/F54 9.9626 Tf 4.548 0 Td [(.)]TJ + [-500(The)-218(r)18(ow)-218(and)-219(column)-218(indices)-218(ar)18(e)-218(r)18(eturned)-218(in)-219(the)-218(local)-218(numbering)-218(scheme;)]TJ 12.453 -11.955 Td [(if)-190(the)-190(global)-190(numbering)-190(is)-190(desir)18(ed,)-202(the)-190(user)-190(may)-190(employ)-190(the)]TJ/F67 9.9626 Tf 258.836 0 Td [(psb_loc_to_glob)]TJ/F62 9.9626 Tf -258.836 -11.955 Td [(r)18(outine)-250(on)-250(the)-250(output.)]TJ 0 g 0 G - -20.481 -29.887 Td [(83)]TJ + 139.477 -290.909 Td [(107)]TJ 0 g 0 G ET endstream endobj -1439 0 obj +1706 0 obj << -/Length 6861 +/Length 3995 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 150.705 706.129 Td [(desc)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(6.26)-1000(psb)]TJ ET q -1 0 0 1 171.218 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 174.207 706.129 Td [(a)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(memory)-250(occupation)-250(of)-250(a)-250(PSBLAS)-250(object.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ -ET -q -1 0 0 1 328.257 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 331.395 658.308 Td [(desc)]TJ -ET -q -1 0 0 1 352.944 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 356.083 658.308 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F67 9.9626 Tf 0 -21.918 Td [(isz)-525(=)-525(psb_sizeof\050a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050desc_a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050prec\051)]TJ 0 g 0 G -/F51 9.9626 Tf -226.299 -33.398 Td [(local)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(Whether)-207(the)-207(entries)-207(in)-207(the)-208(indices)-207(vectors)]TJ/F59 9.9626 Tf 181.487 0 Td [(ia)]TJ/F54 9.9626 Tf 10.46 0 Td [(,)]TJ/F59 9.9626 Tf 4.64 0 Td [(ja)]TJ/F54 9.9626 Tf 12.524 0 Td [(ar)18(e)-207(alr)18(eady)-207(in)-207(local)-208(num-)]TJ -210.765 -11.956 Td [(bering.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F59 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F54 9.9626 Tf 36.613 0 Td [(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -224.197 -23.056 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -21.444 Td [(a)]TJ + 0 -19.925 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(into)-250(which)-250(coef)18(\002cients)-250(will)-250(be)-250(inserted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 72.97 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(.)]TJ -65.344 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.137 0 Td [(psb)]TJ ET q -1 0 0 1 360.068 484.968 cm +1 0 0 1 360.068 531.947 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 363.206 484.768 Td [(Tspmat)]TJ +/F67 9.9626 Tf 363.206 531.748 Td [(Tspmat)]TJ ET q -1 0 0 1 395.216 484.968 cm +1 0 0 1 395.216 531.947 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 398.354 484.768 Td [(type)]TJ +/F67 9.9626 Tf 398.354 531.748 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -21.443 Td [(desc)]TJ +/F59 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ ET q -1 0 0 1 171.218 463.524 cm +1 0 0 1 171.218 512.022 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 463.325 Td [(a)]TJ +/F59 9.9626 Tf 174.207 511.823 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ +/F62 9.9626 Tf 9.962 0 Td [(Communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ 0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 328.257 415.704 cm +1 0 0 1 360.068 464.201 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 331.395 415.504 Td [(desc)]TJ +/F67 9.9626 Tf 363.206 464.002 Td [(desc)]TJ ET q -1 0 0 1 352.944 415.704 cm +1 0 0 1 384.755 464.201 cm []0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F59 9.9626 Tf 356.083 415.504 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F67 9.9626 Tf 387.893 464.002 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -226.299 -33.398 Td [(info)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -23.436 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -21.064 Td [(1.)]TJ -0 g 0 G - [-500(On)-312(entry)-312(to)-312(this)-312(r)18(out)1(ine)-312(the)-312(descriptor)-312(may)-312(be)-312(in)-312(either)-312(the)-311(build)-312(or)-312(as-)]TJ 12.453 -11.955 Td [(sembled)-250(state.)]TJ -0 g 0 G - -12.453 -21.443 Td [(2.)]TJ -0 g 0 G - [-500(On)-314(entry)-315(to)-314(this)-315(r)18(ou)1(tine)-315(the)-314(sparse)-315(matrix)-314(may)-314(be)-315(in)-314(either)-314(the)-315(build)-314(or)]TJ 12.453 -11.955 Td [(update)-250(state.)]TJ -0 g 0 G - -12.453 -21.444 Td [(3.)]TJ -0 g 0 G - [-500(If)-263(the)-263(descriptor)-263(is)-262(in)-263(the)-263(build)-263(state,)-266(then)-263(the)-263(sparse)-263(matrix)-262(must)-263(also)-263(be)]TJ 12.453 -11.955 Td [(in)-212(the)-212(build)-213(state;)-224(the)-213(action)-212(of)-212(the)-212(r)18(outine)-212(is)-213(to)-212(\050implicitly\051)-212(call)]TJ/F59 9.9626 Tf 271.732 0 Td [(psb_cdins)]TJ/F54 9.9626 Tf -271.732 -11.955 Td [(to)-259(add)-259(entries)-259(to)-259(the)-259(sparsity)-259(pattern;)-263(each)-259(sparse)-259(matrix)-259(entry)-259(implicitly)]TJ 0 -11.955 Td [(de\002nes)-288(a)-288(graph)-288(edge,)-297(that)-288(is)-288(passed)-288(to)-288(the)-288(descriptor)-288(r)18(outine)-288(for)-288(the)-288(ap-)]TJ 0 -11.955 Td [(pr)18(opriate)-250(pr)18(ocessing;)]TJ -0 g 0 G - -12.453 -21.444 Td [(4.)]TJ -0 g 0 G - [-500(The)-250(input)-250(data)-250(can)-250(be)-250(passed)-250(in)-250(either)-250(COO)-250(or)-250(CSR)-250(formats;)]TJ -0 g 0 G - 0 -21.443 Td [(5.)]TJ -0 g 0 G - [-500(In)-307(COO)-307(format)-307(the)-306(coef)18(\002cients)-307(to)-307(be)-307(inserted)-307(ar)18(e)-307(r)18(epr)18(esented)-306(by)-307(the)-307(or)18(-)]TJ 12.453 -11.955 Td [(der)18(ed)-194(triples)]TJ/F52 9.9626 Tf 57.352 0 Td [(i)-47(a)]TJ/F85 10.3811 Tf 7.911 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)]TJ/F52 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.84 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F85 10.3811 Tf 13.37 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)-205(for)]TJ/F52 9.9626 Tf 19.208 0 Td [(i)]TJ/F85 10.3811 Tf 5.856 0 Td [(=)]TJ/F54 9.9626 Tf 10.961 0 Td [(1,)-179(.)-192(.)-191(.)-180(,)]TJ/F52 9.9626 Tf 26.608 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.337 0 Td [(;)-212(these)-194(triples)-194(ar)18(e)-193(arbitrary;)]TJ -0 g 0 G - -60.701 -29.888 Td [(84)]TJ +/F59 9.9626 Tf -258.11 -19.925 Td [(prec)]TJ 0 g 0 G +/F62 9.9626 Tf 24.348 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ ET - -endstream -endobj -1448 0 obj -<< -/Length 4535 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 388.441 408.41 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F54 9.9626 Tf 112.349 706.129 Td [(6.)]TJ -0 g 0 G - [-500(In)-272(CSR)-271(format)-272(the)-271(coef)18(\002cients)-272(to)-271(be)-272(inserted)-272(for)-271(each)-272(input)-271(r)18(ow)]TJ/F52 9.9626 Tf 294.598 0 Td [(i)]TJ/F85 10.3811 Tf 6.254 0 Td [(=)]TJ/F54 9.9626 Tf 11.36 0 Td [(1,)]TJ/F52 9.9626 Tf 9.257 0 Td [(n)-15(r)]TJ/F54 9.9626 Tf -309.016 -11.955 Td [(ar)18(e)-311(r)18(epr)18(esented)-312(by)-311(the)-311(or)18(der)18(ed)-312(triples)]TJ/F85 10.3811 Tf 171.689 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 5.251 0 Td [(+)]TJ/F52 9.9626 Tf 10.413 0 Td [(i)-22(r)-35(w)]TJ/F83 10.3811 Tf 16.818 0 Td [(\000)]TJ/F54 9.9626 Tf 10.358 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)]TJ/F52 9.9626 Tf 4.624 0 Td [(j)-40(a)]TJ/F85 10.3811 Tf 7.841 0 Td [(\050)]TJ/F52 9.9626 Tf 4.622 0 Td [(j)]TJ/F85 10.3811 Tf 3.019 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)]TJ/F52 9.9626 Tf 4.276 0 Td [(v)-40(a)-25(l)]TJ/F85 10.3811 Tf 13.37 0 Td [(\050)]TJ/F52 9.9626 Tf 4.622 0 Td [(j)]TJ/F85 10.3811 Tf 3.019 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(,)-327(for)]TJ/F52 9.9626 Tf 22.013 0 Td [(j)]TJ/F85 10.3811 Tf 6.917 0 Td [(=)]TJ/F52 9.9626 Tf -310.555 -11.955 Td [(i)-22(r)-90(p)]TJ/F85 10.3811 Tf 12.991 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(,)-179(.)-192(.)-191(.)-180(,)]TJ/F52 9.9626 Tf 21.557 0 Td [(i)-22(r)-90(p)]TJ/F85 10.3811 Tf 12.991 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 5.301 0 Td [(+)]TJ/F54 9.9626 Tf 10.407 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F83 10.3811 Tf 6.486 0 Td [(\000)]TJ/F54 9.9626 Tf 10.407 0 Td [(1;)-362(these)-325(triples)-324(should)-325(belong)-325(to)-324(the)-325(curr)18(ent)-325(pr)18(o-)]TJ -100.947 -11.956 Td [(cess,)-276(i.e.)]TJ/F52 9.9626 Tf 39.307 0 Td [(i)]TJ/F85 10.3811 Tf 5.103 0 Td [(+)]TJ/F52 9.9626 Tf 10.263 0 Td [(i)-22(r)-35(w)]TJ/F83 10.3811 Tf 16.669 0 Td [(\000)]TJ/F54 9.9626 Tf 10.209 0 Td [(1)-271(should)-271(be)-271(one)-271(of)-271(the)-271(local)-270(indices,)-277(but)-271(ar)18(e)-270(otherwise)]TJ -81.551 -11.955 Td [(arbitrary;)]TJ -0 g 0 G - -12.453 -19.925 Td [(7.)]TJ -0 g 0 G - [-500(Ther)18(e)-315(is)-314(no)-315(r)18(equir)18(ement)-314(that)-315(a)-315(given)-314(r)18(ow)-315(must)-315(be)-314(passed)-315(in)-315(its)-314(entir)18(ety)]TJ 12.453 -11.955 Td [(to)-298(a)-299(single)-298(call)-298(to)-299(thi)1(s)-299(r)18(outine:)-406(the)-299(buildup)-298(of)-298(a)-299(r)18(ow)-298(may)-298(be)-299(split)-298(into)-298(as)]TJ 0 -11.955 Td [(many)-250(calls)-250(as)-250(desir)18(ed)-250(\050even)-250(in)-250(the)-250(CSR)-250(format\051;)]TJ -0 g 0 G - -12.453 -19.926 Td [(8.)]TJ -0 g 0 G - [-500(Coef)18(\002cients)-288(fr)18(om)-289(dif)18(fer)18(ent)-288(r)18(ows)-288(may)-289(also)-288(be)-288(mixed)-289(up)-288(fr)18(eely)-288(in)-289(a)-288(single)]TJ 12.453 -11.955 Td [(call,)-250(accor)18(ding)-250(to)-250(the)-250(application)-250(needs;)]TJ -0 g 0 G - -12.453 -19.925 Td [(9.)]TJ +/F67 9.9626 Tf 391.579 408.211 Td [(prec)]TJ +ET +q +1 0 0 1 413.128 408.41 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 416.266 408.211 Td [(type)]TJ 0 g 0 G - [-500(Coef)18(\002cients)-190(fr)18(om)-190(matrix)-190(r)18(ows)-190(not)-190(owned)-190(by)-190(the)-190(calling)-190(pr)18(ocess)-190(ar)18(e)-190(tr)18(eated)]TJ 12.453 -11.955 Td [(accor)18(ding)-254(to)-254(the)-253(value)-254(of)]TJ/F59 9.9626 Tf 111.539 0 Td [(bldmode)]TJ/F54 9.9626 Tf 39.141 0 Td [(speci\002ed)-254(at)-253(allocation)-254(time;)-256(if)]TJ/F59 9.9626 Tf 131.512 0 Td [(bldmode)]TJ/F54 9.9626 Tf -282.192 -11.956 Td [(was)-300(chosen)-300(as)]TJ/F59 9.9626 Tf 66.146 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 97.136 0 Td [(the)-300(library)-300(will)-300(keep)-300(track)-301(of)-300(them,)]TJ -163.282 -11.955 Td [(otherwise)-250(they)-250(ar)18(e)-250(silently)-250(ignor)18(ed;)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - -17.435 -19.925 Td [(10.)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(If)-295(the)-294(descriptor)-295(is)-295(i)1(n)-295(the)-295(assembled)-294(state,)-306(then)-295(any)-294(entries)-295(in)-295(the)-294(sparse)]TJ 17.435 -11.955 Td [(matrix)-284(that)-284(would)-284(generate)-284(additional)-284(communication)-284(r)18(equir)18(ements)-284(ar)18(e)]TJ 0 -11.955 Td [(ignor)18(ed;)]TJ 0 g 0 G - -17.435 -19.926 Td [(11.)]TJ + 0 -19.925 Td [(Function)-250(value)]TJ 0 g 0 G - [-500(If)-268(the)-268(matrix)-268(is)-268(in)-268(the)-268(update)-268(state,)-273(any)-268(entries)-268(in)-268(positions)-268(that)-268(wer)18(e)-268(not)]TJ 17.435 -11.955 Td [(pr)18(esent)-250(in)-250(the)-250(original)-250(matrix)-250(ar)18(e)-250(ignor)18(ed.)]TJ +/F62 9.9626 Tf 72.776 0 Td [(The)-322(memory)-322(occupation)-322(of)-323(the)-322(object)-322(speci\002ed)-322(in)-322(the)-322(calling)]TJ -47.87 -11.956 Td [(sequence,)-250(in)-250(bytes.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(Returned)-250(as:)-310(an)]TJ/F67 9.9626 Tf 71.82 0 Td [(integer\050psb_long_int_k_\051)]TJ/F62 9.9626 Tf 128.019 0 Td [(number)74(.)]TJ 0 g 0 G - 141.968 -360.647 Td [(85)]TJ + -60.362 -242.057 Td [(108)]TJ 0 g 0 G ET endstream endobj -1348 0 obj +1607 0 obj << /Type /ObjStm /N 100 -/First 977 -/Length 10176 ->> -stream -1347 0 308 59 312 117 1344 174 1351 307 1349 446 1353 593 1354 651 1350 709 1357 829 -1355 968 1359 1126 1361 1185 1356 1244 1363 1391 1365 1509 1366 1567 1367 1625 1368 1683 1369 1741 -1370 1799 1362 1857 1374 1938 1372 2077 1376 2222 316 2281 1373 2339 1379 2459 1377 2598 1381 2756 -1382 2814 1383 2872 1384 2930 1378 2988 1388 3082 1385 3230 1386 3375 1390 3521 320 3580 1391 3638 -1387 3697 1395 3791 1392 3939 1393 4084 1397 4231 324 4289 1394 4346 1400 4440 1398 4579 1402 4724 -328 4783 1399 4841 1406 4935 1403 5083 1404 5228 1408 5375 332 5433 1405 5490 1411 5610 1413 5728 -1414 5787 1415 5846 1410 5905 1419 5986 1416 6134 1417 6281 1421 6426 336 6484 1422 6541 1418 6599 -1424 6693 1426 6811 1427 6870 1428 6929 1429 6988 1423 7047 1432 7141 1434 7259 340 7317 1431 7374 -1438 7494 1430 7651 1435 7794 1436 7939 1440 8082 1441 8141 1442 8199 1443 8258 1444 8317 1445 8376 -1437 8435 1447 8555 1449 8673 1450 8731 1451 8789 1452 8847 1453 8905 1454 8963 1455 9021 1446 9079 -% 1347 0 obj -<< -/D [1345 0 R /XYZ 149.705 753.953 null] ->> -% 308 0 obj -<< -/D [1345 0 R /XYZ 150.705 716.092 null] ->> -% 312 0 obj -<< -/D [1345 0 R /XYZ 150.705 691.48 null] ->> -% 1344 0 obj -<< -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R /F85 814 0 R >> -/ProcSet [ /PDF /Text ] +/First 973 +/Length 10526 >> -% 1351 0 obj +stream +1604 0 1601 148 1602 294 1606 438 360 497 1603 555 1609 675 1611 793 1612 851 1613 909 +1608 967 1618 1048 1614 1205 1615 1346 1616 1489 1620 1633 364 1692 1621 1750 1622 1809 1617 1868 +1626 1975 1623 2123 1624 2266 1628 2413 368 2471 1625 2528 1630 2622 1632 2740 372 2799 1629 2857 +1635 2964 1633 3103 1637 3250 376 3308 1634 3365 1640 3472 1642 3590 1643 3649 1644 3708 1639 3767 +1647 3848 1645 3987 1649 4134 380 4192 1646 4249 1652 4356 1650 4495 1654 4642 384 4701 1655 4759 +1651 4818 1658 4925 1656 5064 1660 5211 388 5269 1661 5326 1657 5384 1664 5491 1662 5630 1666 5777 +392 5836 1667 5894 1663 5953 1670 6060 1668 6199 1672 6346 396 6404 1673 6461 1669 6519 1677 6626 +1675 6765 1679 6910 400 6969 1680 7027 1681 7086 1676 7145 1684 7239 1682 7378 1686 7523 405 7581 +1687 7638 1688 7696 1683 7754 1691 7848 1689 7987 1693 8132 409 8191 1690 8249 1695 8370 1697 8488 +1698 8546 1699 8604 1700 8662 1694 8720 1705 8840 1701 8997 1702 9142 1703 9289 1707 9436 413 9495 +% 1604 0 obj << /Type /Page -/Contents 1352 0 R -/Resources 1350 0 R +/Contents 1605 0 R +/Resources 1603 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1349 0 R ] +/Parent 1600 0 R +/Annots [ 1601 0 R 1602 0 R ] >> -% 1349 0 obj +% 1601 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 324.687 359.001 336.746] +/Rect [342.753 362.621 409.811 374.68] /A << /S /GoTo /D (descdata) >> >> -% 1353 0 obj +% 1602 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [417.183 207.637 493.237 219.697] +/A << /S /GoTo /D (vdata) >> +>> +% 1606 0 obj << -/D [1351 0 R /XYZ 98.895 753.953 null] +/D [1604 0 R /XYZ 149.705 753.953 null] >> -% 1354 0 obj +% 360 0 obj << -/D [1351 0 R /XYZ 99.895 234.157 null] +/D [1604 0 R /XYZ 150.705 716.092 null] >> -% 1350 0 obj +% 1603 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1357 0 obj +% 1609 0 obj << /Type /Page -/Contents 1358 0 R -/Resources 1356 0 R +/Contents 1610 0 R +/Resources 1608 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1330 0 R -/Annots [ 1355 0 R ] +/Parent 1600 0 R >> -% 1355 0 obj +% 1611 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [405.298 215.702 427.216 227.166] -/A << /S /GoTo /D (subsubsection.2.3.1) >> +/D [1609 0 R /XYZ 98.895 753.953 null] >> -% 1359 0 obj +% 1612 0 obj << -/D [1357 0 R /XYZ 149.705 753.953 null] +/D [1609 0 R /XYZ 99.895 701.929 null] >> -% 1361 0 obj +% 1613 0 obj << -/D [1357 0 R /XYZ 150.705 133.283 null] +/D [1609 0 R /XYZ 99.895 680.684 null] >> -% 1356 0 obj +% 1608 0 obj << -/Font << /F59 812 0 R /F54 586 0 R /F51 584 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R /F61 1360 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1363 0 obj +% 1618 0 obj << /Type /Page -/Contents 1364 0 R -/Resources 1362 0 R +/Contents 1619 0 R +/Resources 1617 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R +/Parent 1600 0 R +/Annots [ 1614 0 R 1615 0 R 1616 0 R ] >> -% 1365 0 obj +% 1614 0 obj << -/D [1363 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [310.942 573.77 378 585.83] +/A << /S /GoTo /D (descdata) >> >> -% 1366 0 obj +% 1615 0 obj << -/D [1363 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [322.33 482.114 398.384 494.174] +/A << /S /GoTo /D (vdata) >> >> -% 1367 0 obj +% 1616 0 obj << -/D [1363 0 R /XYZ 99.895 687.379 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [225.165 380.495 301.219 392.555] +/A << /S /GoTo /D (vdata) >> >> -% 1368 0 obj +% 1620 0 obj << -/D [1363 0 R /XYZ 99.895 667.454 null] +/D [1618 0 R /XYZ 149.705 753.953 null] >> -% 1369 0 obj +% 364 0 obj << -/D [1363 0 R /XYZ 99.895 626.268 null] +/D [1618 0 R /XYZ 150.705 716.092 null] >> -% 1370 0 obj +% 1621 0 obj +<< +/D [1618 0 R /XYZ 150.705 278.482 null] +>> +% 1622 0 obj << -/D [1363 0 R /XYZ 99.895 567.828 null] +/D [1618 0 R /XYZ 150.705 244.007 null] >> -% 1362 0 obj +% 1617 0 obj << -/Font << /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1374 0 obj +% 1626 0 obj << /Type /Page -/Contents 1375 0 R -/Resources 1373 0 R +/Contents 1627 0 R +/Resources 1625 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1372 0 R ] +/Parent 1600 0 R +/Annots [ 1623 0 R 1624 0 R ] >> -% 1372 0 obj +% 1623 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 116.52 409.811 128.58] +/Rect [174.355 561.815 250.41 573.875] +/A << /S /GoTo /D (vdata) >> +>> +% 1624 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [260.133 482.114 327.191 494.174] /A << /S /GoTo /D (descdata) >> >> -% 1376 0 obj +% 1628 0 obj << -/D [1374 0 R /XYZ 149.705 753.953 null] +/D [1626 0 R /XYZ 98.895 753.953 null] >> -% 316 0 obj +% 368 0 obj << -/D [1374 0 R /XYZ 150.705 716.092 null] +/D [1626 0 R /XYZ 99.895 716.092 null] >> -% 1373 0 obj +% 1625 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1379 0 obj +% 1630 0 obj << /Type /Page -/Contents 1380 0 R -/Resources 1378 0 R +/Contents 1631 0 R +/Resources 1629 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1600 0 R +>> +% 1632 0 obj +<< +/D [1630 0 R /XYZ 149.705 753.953 null] +>> +% 372 0 obj +<< +/D [1630 0 R /XYZ 150.705 716.092 null] +>> +% 1629 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1635 0 obj +<< +/Type /Page +/Contents 1636 0 R +/Resources 1634 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1377 0 R ] +/Parent 1638 0 R +/Annots [ 1633 0 R ] >> -% 1377 0 obj +% 1633 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [278.165 401.451 300.083 413.511] -/A << /S /GoTo /D (subsubsection.2.3.1) >> +/Rect [291.943 483.894 359.001 495.954] +/A << /S /GoTo /D (descdata) >> >> -% 1381 0 obj +% 1637 0 obj << -/D [1379 0 R /XYZ 98.895 753.953 null] +/D [1635 0 R /XYZ 98.895 753.953 null] +>> +% 376 0 obj +<< +/D [1635 0 R /XYZ 99.895 716.092 null] +>> +% 1634 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1640 0 obj +<< +/Type /Page +/Contents 1641 0 R +/Resources 1639 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1638 0 R >> -% 1382 0 obj +% 1642 0 obj << -/D [1379 0 R /XYZ 99.895 496.698 null] +/D [1640 0 R /XYZ 149.705 753.953 null] >> -% 1383 0 obj +% 1643 0 obj << -/D [1379 0 R /XYZ 99.895 474.179 null] +/D [1640 0 R /XYZ 150.705 716.092 null] >> -% 1384 0 obj +% 1644 0 obj << -/D [1379 0 R /XYZ 99.895 430.343 null] +/D [1640 0 R /XYZ 150.705 687.379 null] >> -% 1378 0 obj +% 1639 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1388 0 obj +% 1647 0 obj << /Type /Page -/Contents 1389 0 R -/Resources 1387 0 R +/Contents 1648 0 R +/Resources 1646 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1385 0 R 1386 0 R ] ->> -% 1385 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 409.811 585.83] -/A << /S /GoTo /D (descdata) >> +/Parent 1638 0 R +/Annots [ 1645 0 R ] >> -% 1386 0 obj +% 1645 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 416.361 409.811 428.42] +/Rect [291.943 482.114 359.001 494.174] /A << /S /GoTo /D (descdata) >> >> -% 1390 0 obj -<< -/D [1388 0 R /XYZ 149.705 753.953 null] ->> -% 320 0 obj +% 1649 0 obj << -/D [1388 0 R /XYZ 150.705 716.092 null] +/D [1647 0 R /XYZ 98.895 753.953 null] >> -% 1391 0 obj +% 380 0 obj << -/D [1388 0 R /XYZ 150.705 326.302 null] +/D [1647 0 R /XYZ 99.895 716.092 null] >> -% 1387 0 obj +% 1646 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1395 0 obj +% 1652 0 obj << /Type /Page -/Contents 1396 0 R -/Resources 1394 0 R +/Contents 1653 0 R +/Resources 1651 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1392 0 R 1393 0 R ] +/Parent 1638 0 R +/Annots [ 1650 0 R ] >> -% 1392 0 obj +% 1650 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 573.77 359.001 585.83] +/Rect [342.753 494.069 409.811 506.129] /A << /S /GoTo /D (descdata) >> >> -% 1393 0 obj +% 1654 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 484.107 359.001 496.166] -/A << /S /GoTo /D (descdata) >> +/D [1652 0 R /XYZ 149.705 753.953 null] >> -% 1397 0 obj +% 384 0 obj << -/D [1395 0 R /XYZ 98.895 753.953 null] +/D [1652 0 R /XYZ 150.705 716.092 null] >> -% 324 0 obj +% 1655 0 obj << -/D [1395 0 R /XYZ 99.895 716.092 null] +/D [1652 0 R /XYZ 150.705 382.093 null] >> -% 1394 0 obj +% 1651 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1400 0 obj +% 1658 0 obj << /Type /Page -/Contents 1401 0 R -/Resources 1399 0 R +/Contents 1659 0 R +/Resources 1657 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1371 0 R -/Annots [ 1398 0 R ] +/Parent 1638 0 R +/Annots [ 1656 0 R ] >> -% 1398 0 obj +% 1656 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 409.811 585.83] +/Rect [291.943 494.069 359.001 506.129] /A << /S /GoTo /D (descdata) >> >> -% 1402 0 obj +% 1660 0 obj << -/D [1400 0 R /XYZ 149.705 753.953 null] +/D [1658 0 R /XYZ 98.895 753.953 null] >> -% 328 0 obj +% 388 0 obj << -/D [1400 0 R /XYZ 150.705 716.092 null] +/D [1658 0 R /XYZ 99.895 716.092 null] >> -% 1399 0 obj +% 1661 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/D [1658 0 R /XYZ 99.895 258.556 null] +>> +% 1657 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1406 0 obj +% 1664 0 obj << /Type /Page -/Contents 1407 0 R -/Resources 1405 0 R +/Contents 1665 0 R +/Resources 1663 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R -/Annots [ 1403 0 R 1404 0 R ] +/Parent 1638 0 R +/Annots [ 1662 0 R ] >> -% 1403 0 obj +% 1662 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 452.321 369.462 464.381] -/A << /S /GoTo /D (spdata) >> +/Rect [342.753 494.069 409.811 506.129] +/A << /S /GoTo /D (descdata) >> >> -% 1404 0 obj +% 1666 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 209.193 359.001 221.252] -/A << /S /GoTo /D (descdata) >> +/D [1664 0 R /XYZ 149.705 753.953 null] >> -% 1408 0 obj +% 392 0 obj << -/D [1406 0 R /XYZ 98.895 753.953 null] +/D [1664 0 R /XYZ 150.705 716.092 null] >> -% 332 0 obj +% 1667 0 obj << -/D [1406 0 R /XYZ 99.895 716.092 null] +/D [1664 0 R /XYZ 150.705 382.093 null] >> -% 1405 0 obj +% 1663 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1411 0 obj +% 1670 0 obj << /Type /Page -/Contents 1412 0 R -/Resources 1410 0 R +/Contents 1671 0 R +/Resources 1669 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R +/Parent 1674 0 R +/Annots [ 1668 0 R ] >> -% 1413 0 obj +% 1668 0 obj << -/D [1411 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 494.069 359.001 506.129] +/A << /S /GoTo /D (descdata) >> >> -% 1414 0 obj +% 1672 0 obj << -/D [1411 0 R /XYZ 150.705 716.092 null] +/D [1670 0 R /XYZ 98.895 753.953 null] >> -% 1415 0 obj +% 396 0 obj << -/D [1411 0 R /XYZ 150.705 663.469 null] +/D [1670 0 R /XYZ 99.895 716.092 null] >> -% 1410 0 obj +% 1673 0 obj +<< +/D [1670 0 R /XYZ 99.895 258.556 null] +>> +% 1669 0 obj << -/Font << /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1419 0 obj +% 1677 0 obj << /Type /Page -/Contents 1420 0 R -/Resources 1418 0 R +/Contents 1678 0 R +/Resources 1676 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R -/Annots [ 1416 0 R 1417 0 R ] +/Parent 1674 0 R +/Annots [ 1675 0 R ] >> -% 1416 0 obj +% 1675 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 571.679 359.001 583.739] +/Rect [342.753 573.77 409.811 585.83] /A << /S /GoTo /D (descdata) >> >> -% 1417 0 obj +% 1679 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 227.887 369.462 239.946] -/A << /S /GoTo /D (spdata) >> +/D [1677 0 R /XYZ 149.705 753.953 null] >> -% 1421 0 obj +% 400 0 obj << -/D [1419 0 R /XYZ 98.895 753.953 null] +/D [1677 0 R /XYZ 150.705 716.092 null] >> -% 336 0 obj +% 1680 0 obj << -/D [1419 0 R /XYZ 99.895 716.092 null] +/D [1677 0 R /XYZ 150.705 358.183 null] >> -% 1422 0 obj +% 1681 0 obj << -/D [1419 0 R /XYZ 99.895 136.374 null] +/D [1677 0 R /XYZ 150.705 314.403 null] >> -% 1418 0 obj +% 1676 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1424 0 obj +% 1684 0 obj << /Type /Page -/Contents 1425 0 R -/Resources 1423 0 R +/Contents 1685 0 R +/Resources 1683 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R ->> -% 1426 0 obj -<< -/D [1424 0 R /XYZ 149.705 753.953 null] ->> -% 1427 0 obj -<< -/D [1424 0 R /XYZ 150.705 716.092 null] ->> -% 1428 0 obj -<< -/D [1424 0 R /XYZ 150.705 699.334 null] +/Parent 1674 0 R +/Annots [ 1682 0 R ] >> -% 1429 0 obj +% 1682 0 obj << -/D [1424 0 R /XYZ 150.705 644.819 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 573.77 359.001 585.83] +/A << /S /GoTo /D (descdata) >> >> -% 1423 0 obj +% 1686 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [1684 0 R /XYZ 98.895 753.953 null] >> -% 1432 0 obj +% 405 0 obj << -/Type /Page -/Contents 1433 0 R -/Resources 1431 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R +/D [1684 0 R /XYZ 99.895 716.092 null] >> -% 1434 0 obj +% 1687 0 obj << -/D [1432 0 R /XYZ 98.895 753.953 null] +/D [1684 0 R /XYZ 99.895 370.138 null] >> -% 340 0 obj +% 1688 0 obj << -/D [1432 0 R /XYZ 99.895 716.092 null] +/D [1684 0 R /XYZ 99.895 338.313 null] >> -% 1431 0 obj +% 1683 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1438 0 obj +% 1691 0 obj << /Type /Page -/Contents 1439 0 R -/Resources 1437 0 R +/Contents 1692 0 R +/Resources 1690 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1409 0 R -/Annots [ 1430 0 R 1435 0 R 1436 0 R ] +/Parent 1674 0 R +/Annots [ 1689 0 R ] >> -% 1430 0 obj +% 1689 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 654.503 378 666.562] -/A << /S /GoTo /D (descdata) >> +/Rect [342.753 491.971 420.271 504.031] +/A << /S /GoTo /D (spdata) >> >> -% 1435 0 obj +% 1693 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 480.963 420.271 493.022] -/A << /S /GoTo /D (spdata) >> +/D [1691 0 R /XYZ 149.705 753.953 null] >> -% 1436 0 obj +% 409 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 411.699 378 423.758] -/A << /S /GoTo /D (descdata) >> +/D [1691 0 R /XYZ 150.705 716.092 null] >> -% 1440 0 obj +% 1690 0 obj << -/D [1438 0 R /XYZ 149.705 753.953 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F69 1460 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1441 0 obj +% 1695 0 obj << -/D [1438 0 R /XYZ 150.705 306.27 null] +/Type /Page +/Contents 1696 0 R +/Resources 1694 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1674 0 R >> -% 1442 0 obj +% 1697 0 obj << -/D [1438 0 R /XYZ 150.705 272.927 null] +/D [1695 0 R /XYZ 98.895 753.953 null] >> -% 1443 0 obj +% 1698 0 obj << -/D [1438 0 R /XYZ 150.705 236.878 null] +/D [1695 0 R /XYZ 99.895 496.698 null] >> -% 1444 0 obj +% 1699 0 obj << -/D [1438 0 R /XYZ 150.705 167.614 null] +/D [1695 0 R /XYZ 99.895 438.313 null] >> -% 1445 0 obj +% 1700 0 obj << -/D [1438 0 R /XYZ 150.705 146.171 null] +/D [1695 0 R /XYZ 99.895 418.388 null] >> -% 1437 0 obj +% 1694 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1447 0 obj +% 1705 0 obj << /Type /Page -/Contents 1448 0 R -/Resources 1446 0 R +/Contents 1706 0 R +/Resources 1704 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R +/Parent 1674 0 R +/Annots [ 1701 0 R 1702 0 R 1703 0 R ] >> -% 1449 0 obj -<< -/D [1447 0 R /XYZ 98.895 753.953 null] ->> -% 1450 0 obj +% 1701 0 obj << -/D [1447 0 R /XYZ 99.895 716.092 null] ->> -% 1451 0 obj -<< -/D [1447 0 R /XYZ 99.895 651.514 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 527.942 420.271 540.002] +/A << /S /GoTo /D (spdata) >> >> -% 1452 0 obj +% 1702 0 obj << -/D [1447 0 R /XYZ 99.895 607.678 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 460.196 409.811 472.256] +/A << /S /GoTo /D (descdata) >> >> -% 1453 0 obj +% 1703 0 obj << -/D [1447 0 R /XYZ 99.895 575.798 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 404.405 438.184 416.465] +/A << /S /GoTo /D (precdata) >> >> -% 1454 0 obj +% 1707 0 obj << -/D [1447 0 R /XYZ 99.895 520.007 null] +/D [1705 0 R /XYZ 149.705 753.953 null] >> -% 1455 0 obj +% 413 0 obj << -/D [1447 0 R /XYZ 99.895 476.171 null] +/D [1705 0 R /XYZ 150.705 716.092 null] >> -% 1446 0 obj + +endstream +endobj +1711 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Length 5626 >> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 99.895 706.129 Td [(6.27)-1000(Sorting)-250(utilities)-250(\227)]TJ 0 -20.164 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 686.164 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 685.965 Td [(msort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Merge-sort)-250(algorithm)]TJ -24.221 -12.574 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 673.59 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 673.391 Td [(qsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Quicksort)-250(algorithm)]TJ -24.221 -12.575 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 661.016 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 660.816 Td [(hsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Heapsort)-250(algorithm)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -24.221 -22.402 Td [(call)-525(psb_msort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_qsort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_hsort\050x,ix,dir,flag\051)]TJ/F62 9.9626 Tf 14.944 -21.783 Td [(These)-236(serial)-235(r)18(outines)-236(sort)-236(a)-235(sequence)]TJ/F60 9.9626 Tf 162.066 0 Td [(X)]TJ/F62 9.9626 Tf 9.884 0 Td [(into)-236(ascending)-235(or)-236(descending)-236(or)18(der)74(.)]TJ -186.894 -11.955 Td [(The)-243(ar)18(gument)-243(meaning)-243(is)-242(identical)-243(for)-243(the)-243(thr)18(ee)-243(calls;)-245(the)-243(only)-243(dif)18(fer)18(ence)-242(is)-243(the)]TJ 0 -11.955 Td [(algorithm)-250(used)-250(to)-250(accomplish)-250(the)-250(task)-250(\050see)-250(Usage)-250(Notes)-250(below\051.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.783 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.402 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.402 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(to)-250(be)-250(sorted.)]TJ 14.944 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ +0 g 0 G +/F62 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(\050at)-250(least\051)-250(the)-250(same)-250(size)-250(as)]TJ/F60 9.9626 Tf 254.189 0 Td [(X)]TJ/F62 9.9626 Tf 7.537 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -286.633 -22.402 Td [(dir)]TJ +0 g 0 G +/F62 9.9626 Tf 18.262 0 Td [(The)-250(desir)18(ed)-250(or)18(dering.)]TJ 6.645 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -22.402 Td [(Integer)-250(and)-250(real)-250(data:)]TJ +0 g 0 G +/F67 9.9626 Tf 101.28 0 Td [(psb_sort_up_)]TJ/F62 9.9626 Tf 62.764 0 Td [(,)]TJ/F67 9.9626 Tf 5.525 0 Td [(psb_sort_down_)]TJ/F62 9.9626 Tf 73.225 0 Td [(,)]TJ/F67 9.9626 Tf 5.525 0 Td [(psb_asort_up_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)]TJ/F67 9.9626 Tf -294.396 -11.955 Td [(psb_asort_down_)]TJ/F62 9.9626 Tf 78.455 0 Td [(;)-250(default)]TJ/F67 9.9626 Tf 38.784 0 Td [(psb_sort_up_)]TJ/F62 9.9626 Tf 62.764 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -201.921 -17.178 Td [(Complex)-250(data:)]TJ +0 g 0 G +/F67 9.9626 Tf 70.286 0 Td [(psb_lsort_up_)]TJ/F62 9.9626 Tf 67.995 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_lsort_down_)]TJ/F62 9.9626 Tf 78.455 0 Td [(,)]TJ/F67 9.9626 Tf 4.503 0 Td [(psb_asort_up_)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)]TJ/F67 9.9626 Tf 4.504 0 Td [(psb_asort_down_)]TJ/F62 9.9626 Tf 78.455 0 Td [(;)]TJ -354.777 -11.956 Td [(default)]TJ/F67 9.9626 Tf 33.803 0 Td [(psb_lsort_up_)]TJ/F62 9.9626 Tf 67.994 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -148.622 -22.402 Td [(\003ag)]TJ +0 g 0 G +/F62 9.9626 Tf 21.589 0 Td [(Whether)-250(to)-250(keep)-250(the)-250(original)-250(values)-250(in)]TJ/F60 9.9626 Tf 171.52 0 Td [(I)-81(X)]TJ/F62 9.9626 Tf 11.661 0 Td [(.)]TJ -179.863 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F67 9.9626 Tf 128.287 0 Td [(psb_sort_ovw_idx_)]TJ/F62 9.9626 Tf 90.809 0 Td [(or)]TJ/F67 9.9626 Tf 11.268 0 Td [(psb_sort_keep_idx_)]TJ/F62 9.9626 Tf 94.146 0 Td [(;)]TJ -324.51 -11.955 Td [(default)]TJ/F67 9.9626 Tf 33.803 0 Td [(psb_sort_ovw_idx_)]TJ/F62 9.9626 Tf 88.916 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -147.626 -24.395 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -22.402 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(of)-250(values,)-250(in)-250(the)-250(chosen)-250(or)18(dering.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ +0 g 0 G +/F62 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-238(integer)-237(array)-238(of)-237(rank)-238(1,)-240(whose)-238(entries)-237(ar)18(e)-238(moved)-237(to)-238(the)-238(same)-237(position)]TJ 0 -11.955 Td [(as)-250(the)-250(corr)18(esponding)-250(entries)-250(in)]TJ/F60 9.9626 Tf 138.215 0 Td [(x)]TJ/F62 9.9626 Tf 5.205 0 Td [(.)]TJ +0 g 0 G + -3.943 -44.517 Td [(109)]TJ +0 g 0 G +ET endstream endobj -1462 0 obj +1716 0 obj << -/Length 6789 +/Length 7300 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.9)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(spasb)-250(\227)-250(Sparse)-250(matrix)-250(assembly)-250(routine)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G + [-500(For)-370(integer)-370(or)-370(r)18(eal)-370(data)-370(the)-370(sorting)-370(can)-370(be)-370(performed)-370(in)-370(the)-370(up/down)]TJ 12.453 -11.956 Td [(dir)18(ection,)-250(on)-250(the)-250(natural)-250(or)-250(absolute)-250(values;)]TJ 0 g 0 G -/F59 9.9626 Tf -51.12 -19.204 Td [(call)-525(psb_spasb\050a,)-525(desc_a,)-525(info)-525([,)-525(afmt,)-525(upd,)-1050(mold]\051)]TJ + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -22.289 Td [(T)90(ype:)]TJ + [-500(For)-329(complex)-330(data)-329(the)-330(sorting)-329(can)-330(be)-329(done)-329(in)-330(a)-329(lexicographic)-330(or)18(der)-329(\050i.e.:)]TJ 12.453 -11.955 Td [(sort)-263(on)-263(the)-263(r)18(eal)-263(part)-263(with)-263(ties)-263(br)18(oken)-263(accor)18(ding)-263(to)-263(the)-263(imaginary)-263(part\051)-263(or)]TJ 0 -11.955 Td [(on)-250(the)-250(absolute)-250(values;)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + -12.453 -19.925 Td [(3.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -20.421 Td [(On)-250(Entry)]TJ + [-500(The)-325(r)18(outines)-325(r)18(eturn)-325(the)-325(items)-324(in)-325(the)-325(chosen)-325(or)18(dering;)-362(the)-325(output)-325(dif)18(fer)18(-)]TJ 12.453 -11.956 Td [(ence)-244(is)-245(the)-244(handling)-244(of)-244(ties)-244(\050i.e.)-309(items)-244(with)-244(an)-244(equal)-245(v)1(alue\051)-245(in)-244(the)-244(original)]TJ 0 -11.955 Td [(input.)-316(W)55(ith)-252(the)-252(mer)18(ge-sort)-252(algorithm)-252(ties)-252(ar)18(e)-252(pr)18(eserved)-252(in)-252(the)-252(same)-252(r)18(ela-)]TJ 0 -11.955 Td [(tive)-278(or)18(der)-278(as)-278(they)-278(had)-278(in)-278(the)-278(original)-278(sequence,)-285(while)-278(this)-278(is)-278(not)-278(guaran-)]TJ 0 -11.955 Td [(teed)-250(for)-250(quicksort)-250(or)-250(heapsort;)]TJ 0 g 0 G + -12.453 -19.925 Td [(4.)]TJ 0 g 0 G - 0 -20.421 Td [(desc)]TJ + [-500(If)]TJ/F60 9.9626 Tf 22.66 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F93 10.3811 Tf 20.72 0 Td [(=)]TJ/F60 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ ET q -1 0 0 1 171.218 623.994 cm +1 0 0 1 232.862 542.941 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 174.207 623.794 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F54 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F60 9.9626 Tf 235.975 542.742 Td [(s)-25(o)-25(r)-35(t)]TJ ET q -1 0 0 1 360.068 576.173 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 253.036 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 363.206 575.974 Td [(desc)]TJ +/F60 9.9626 Tf 256.149 542.742 Td [(o)-35(v)-25(w)]TJ ET q -1 0 0 1 384.755 576.173 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 274.067 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 387.893 575.974 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -20.421 Td [(afmt)]TJ -0 g 0 G -/F54 9.9626 Tf 26.012 0 Td [(the)-250(storage)-250(format)-250(for)-250(the)-250(sparse)-250(matrix.)]TJ -1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(array)-250(of)-250(characters.)-310(Defalt:)-310('CSR'.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -20.42 Td [(upd)]TJ -0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(Pr)18(ovide)-250(for)-250(updates)-250(to)-250(the)-250(matrix)-250(coef)18(\002cients.)]TJ 1.663 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(integer)74(,)-250(possible)-250(values:)]TJ/F59 9.9626 Tf 165.219 0 Td [(psb_upd_srch_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ/F59 9.9626 Tf 4.981 0 Td [(psb_upd_perm_)]TJ +/F60 9.9626 Tf 277.11 542.742 Td [(i)-32(d)-42(x)]TJ +ET +q +1 0 0 1 291.402 542.941 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 296.503 542.742 Td [(then)-212(the)-212(entries)-212(in)]TJ/F60 9.9626 Tf 80.283 0 Td [(i)-49(x)]TJ/F93 10.3811 Tf 8.588 0 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(1)-290(:)]TJ/F60 9.9626 Tf 13.381 0 Td [(n)]TJ/F93 10.3811 Tf 5.788 0 Td [(\051)]TJ/F62 9.9626 Tf 6.262 0 Td [(wher)18(e)]TJ/F60 9.9626 Tf 29.644 0 Td [(n)]TJ/F62 9.9626 Tf 7.776 0 Td [(is)-212(the)-212(size)]TJ -276.762 -11.956 Td [(of)]TJ/F60 9.9626 Tf 12.255 0 Td [(x)]TJ/F62 9.9626 Tf 8.411 0 Td [(ar)18(e)-322(initialized)-321(to)]TJ/F60 9.9626 Tf 76.228 0 Td [(i)-49(x)]TJ/F93 10.3811 Tf 8.588 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F91 10.3811 Tf 8.364 0 Td [(\040)]TJ/F60 9.9626 Tf 14.651 0 Td [(i)]TJ/F62 9.9626 Tf 2.963 0 Td [(;)-358(thus,)-339(upon)-322(r)18(eturn)-321(fr)18(om)-322(the)-322(subr)18(outine,)]TJ -138.753 -11.955 Td [(for)-270(each)-271(index)]TJ/F60 9.9626 Tf 65.501 0 Td [(i)]TJ/F62 9.9626 Tf 5.657 0 Td [(we)-270(have)-271(in)]TJ/F60 9.9626 Tf 51.095 0 Td [(i)-49(x)]TJ/F93 10.3811 Tf 8.587 0 Td [(\050)]TJ/F60 9.9626 Tf 4.205 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 6.843 0 Td [(the)-270(position)-271(that)-270(the)-270(item)]TJ/F60 9.9626 Tf 114.324 0 Td [(x)]TJ/F93 10.3811 Tf 5.33 0 Td [(\050)]TJ/F60 9.9626 Tf 4.204 0 Td [(i)]TJ/F93 10.3811 Tf 3.088 0 Td [(\051)]TJ/F62 9.9626 Tf 6.844 0 Td [(occupied)]TJ -278.766 -11.955 Td [(in)-250(the)-250(original)-250(data)-250(sequence;)]TJ 0 g 0 G -/F51 9.9626 Tf -263.1 -20.421 Td [(mold)]TJ + -12.453 -19.925 Td [(5.)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -3.876 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F59 9.9626 Tf 201.393 0 Td [(psb)]TJ + [-500(If)]TJ/F60 9.9626 Tf 24.08 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F93 10.3811 Tf 22.648 0 Td [(=)]TJ/F60 9.9626 Tf 13.563 0 Td [(p)-25(s)-25(b)]TJ ET q -1 0 0 1 393.323 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 238.138 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 396.461 371.249 Td [(T)]TJ +/F60 9.9626 Tf 241.252 486.951 Td [(s)-25(o)-25(r)-35(t)]TJ ET q -1 0 0 1 402.319 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 258.312 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 405.457 371.249 Td [(base)]TJ +/F60 9.9626 Tf 261.426 486.951 Td [(k)-30(e)-25(e)-80(p)]TJ ET q -1 0 0 1 427.006 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 280.648 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 430.144 371.249 Td [(sparse)]TJ +/F60 9.9626 Tf 283.692 486.951 Td [(i)-32(d)-42(x)]TJ ET q -1 0 0 1 462.154 371.449 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 297.983 487.15 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 465.292 371.249 Td [(mat)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ +/F62 9.9626 Tf 304.504 486.951 Td [(the)-355(r)18(ou)1(tine)-355(will)-354(assume)-355(that)-355(th)1(e)-355(entries)-355(i)1(n)]TJ/F60 9.9626 Tf -128.838 -11.955 Td [(i)-49(x)]TJ/F93 10.3811 Tf 8.588 0 Td [(\050)]TJ/F62 9.9626 Tf 4.274 0 Td [(:)]TJ/F93 10.3811 Tf 2.74 0 Td [(\051)]TJ/F62 9.9626 Tf 6.64 0 Td [(have)-250(alr)18(eady)-250(been)-250(initialized)-250(by)-250(the)-250(user;)]TJ +0 g 0 G + -34.75 -19.926 Td [(6.)]TJ 0 g 0 G -/F51 9.9626 Tf -330.278 -22.289 Td [(On)-250(Return)]TJ + [-500(The)-270(thr)18(ee)-269(sorting)-270(algorithms)-269(have)-270(a)-269(similar)]TJ/F60 9.9626 Tf 205.79 0 Td [(O)]TJ/F93 10.3811 Tf 8 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(n)]TJ/F62 9.9626 Tf 7.324 0 Td [(log)]TJ/F60 9.9626 Tf 15.663 0 Td [(n)]TJ/F93 10.3811 Tf 5.788 0 Td [(\051)]TJ/F62 9.9626 Tf 6.835 0 Td [(expected)-270(r)8(unning)]TJ -241.221 -11.955 Td [(time;)-278(in)-268(the)-268(average)-269(case)-268(quicksort)-268(will)-269(be)-268(the)-268(fastest)-269(and)-268(mer)18(ge-sort)-268(the)]TJ 0 -11.955 Td [(slowest.)-310(However)-250(note)-250(that:)]TJ 0 g 0 G + 5.321 -19.925 Td [(\050a\051)]TJ 0 g 0 G - 0 -20.421 Td [(a)]TJ + [-500(The)-336(worst)-336(case)-336(r)8(unning)-336(time)-336(for)-337(quicksort)-336(is)]TJ/F60 9.9626 Tf 220.017 0 Td [(O)]TJ/F93 10.3811 Tf 8 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(n)]TJ/F62 7.5716 Tf 5.664 3.616 Td [(2)]TJ/F93 10.3811 Tf 4.408 -3.616 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(;)-379(the)-336(algorithm)]TJ -229.916 -11.955 Td [(implemented)-293(her)18(e)-293(follows)-293(the)-292(well-known)-293(median-of-thr)18(ee)-293(heuris-)]TJ 0 -11.956 Td [(tics,)-250(but)-250(the)-250(worst)-250(case)-250(may)-250(still)-250(apply;)]TJ +0 g 0 G + -17.125 -15.94 Td [(\050b\051)]TJ +0 g 0 G + [-500(The)-190(worst)-190(case)-190(r)8(unning)-190(time)-190(for)-190(mer)18(ge-sort)-190(and)-190(heap-sort)-190(is)]TJ/F60 9.9626 Tf 277.76 0 Td [(O)]TJ/F93 10.3811 Tf 8 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(n)]TJ/F62 9.9626 Tf 7.324 0 Td [(log)]TJ/F60 9.9626 Tf 15.663 0 Td [(n)]TJ/F93 10.3811 Tf 5.788 0 Td [(\051)]TJ/F62 9.9626 Tf -301.684 -11.955 Td [(as)-250(the)-250(average)-250(case;)]TJ +0 g 0 G + -16.04 -15.94 Td [(\050c\051)]TJ +0 g 0 G + [-500(The)-244(mer)18(ge-sort)-244(algorithm)-243(is)-244(implemented)-244(to)-244(take)-244(advantage)-243(of)-244(sub-)]TJ 16.04 -11.955 Td [(sequences)-314(that)-313(may)-314(be)-314(alr)18(eady)-314(in)-313(the)-314(desir)18(ed)-314(or)18(dering)-314(prior)-313(to)-314(the)]TJ 0 -11.956 Td [(subr)18(outine)-390(call;)-459(this)-390(situation)-389(is)-390(r)18(elatively)-390(common)-389(when)-390(dealing)]TJ 0 -11.955 Td [(with)-335(gr)18(oups)-335(of)-335(indices)-335(of)-335(sparse)-336(matrix)-335(entries,)-356(thus)-335(mer)18(ge-sort)-335(is)]TJ 0 -11.955 Td [(the)-319(pr)18(eferr)18(ed)-318(choice)-319(when)-319(a)-318(sorting)-319(is)-319(needed)-318(by)-319(other)-319(r)18(outi)1(nes)-319(in)]TJ 0 -11.955 Td [(the)-250(library)111(.)]TJ +0 g 0 G + 117.559 -193.275 Td [(110)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ ET -q -1 0 0 1 360.068 280.918 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1729 0 obj +<< +/Length 171 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 363.206 280.719 Td [(Tspmat)]TJ +/F59 14.3462 Tf 99.895 705.784 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ +0 g 0 G +/F62 9.9626 Tf 164.384 -615.346 Td [(111)]TJ +0 g 0 G ET -q -1 0 0 1 395.216 280.918 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 280.719 Td [(type)]TJ + +endstream +endobj +1733 0 obj +<< +/Length 5510 +>> +stream 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -20.421 Td [(desc)]TJ +BT +/F59 11.9552 Tf 150.705 706.129 Td [(7.1)-1000(psb)]TJ ET q -1 0 0 1 171.218 260.497 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 174.207 260.298 Td [(a)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(environment)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in/out)]TJ/F54 9.9626 Tf 27.297 0 Td [(.)]TJ -59.098 -11.956 Td [(Speci\002ed)-290(as:)-389(a)-290(str)8(uctur)18(ed)-290(data)-289(of)-290(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 171.305 0 Td [(psb)]TJ +0 g 0 G +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_init\050ctxt,)-525(np,)-525(basectxt,)-525(ids\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-214(subr)18(outine)-215(initializes)-214(the)-215(PSBLAS)-214(parallel)-215(envir)18(onment,)-221(de\002ning)-215(a)-214(vir)18(-)]TJ -14.944 -11.955 Td [(tual)-250(parallel)-250(machine.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(np)]TJ +0 g 0 G +/F62 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(all)-250(available)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.926 Td [(basectxt)]TJ +0 g 0 G +/F62 9.9626 Tf 41.494 0 Td [(the)-321(initial)-321(communication)-321(context.)-524(The)-321(new)-321(context)-321(will)-321(be)-321(de\002ned)]TJ -16.587 -11.955 Td [(fr)18(om)-250(the)-250(pr)18(ocesses)-250(participating)-250(in)-250(the)-250(initial)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(MPI)]TJ ET q -1 0 0 1 363.235 212.677 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 387.574 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 366.373 212.477 Td [(desc)]TJ +/F62 9.9626 Tf 390.563 465.994 Td [(COMM)]TJ ET q -1 0 0 1 387.922 212.677 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 424.904 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F59 9.9626 Tf 391.06 212.477 Td [(type)]TJ +/F62 9.9626 Tf 427.893 465.994 Td [(WORLD.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)-429(If)-290(the)-290(matrix)-289(was)]TJ -236.371 -11.955 Td [(allocated)-209(with)]TJ/F59 9.9626 Tf 64.153 0 Td [(bldmode=psb_matbld_remote_)]TJ/F54 9.9626 Tf 135.988 0 Td [(,)-217(then)-210(the)-209(descriptor)-209(will)-209(be)]TJ -200.141 -11.955 Td [(r)18(eassembled.)]TJ +/F59 9.9626 Tf -277.188 -19.925 Td [(ids)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(info)]TJ +/F62 9.9626 Tf 18.809 0 Td [(Identities)-306(of)-307(the)-306(pr)18(ocesses)-307(to)-306(use)-306(for)-307(the)-306(new)-306(context;)-335(the)-306(ar)18(gument)-307(is)-306(ig-)]TJ 6.097 -11.955 Td [(nor)18(ed)-388(when)]TJ/F67 9.9626 Tf 58.258 0 Td [(np)]TJ/F62 9.9626 Tf 14.324 0 Td [(is)-388(not)-388(speci\002ed.)-723(This)-388(allows)-388(the)-387(pr)18(ocesses)-388(in)-388(the)-388(new)]TJ -72.582 -11.955 Td [(envir)18(onment)-250(to)-250(be)-250(in)-250(an)-250(or)18(der)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(original)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)-560(Default:)-310(use)-250(the)-250(indices)]TJ/F93 10.3811 Tf 240.57 0 Td [(\050)]TJ/F62 9.9626 Tf 4.15 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F60 9.9626 Tf 19.966 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F59 9.9626 Tf -322.483 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - 141.968 -29.888 Td [(86)]TJ +0 g 0 G + 0 -19.925 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-285(communication)-284(context)-285(identifying)-285(the)-284(virtual)-285(parallel)-285(machine,)-293(type)]TJ/F67 9.9626 Tf 3.885 -11.955 Td [(psb_ctxt_type)]TJ/F62 9.9626 Tf 67.995 0 Td [(.)-327(Note)-256(that)-256(this)-256(is)-256(always)-256(a)-255(duplicate)-256(of)]TJ/F67 9.9626 Tf 174.426 0 Td [(basectxt)]TJ/F62 9.9626 Tf 41.842 0 Td [(,)-257(so)-256(that)]TJ -284.263 -11.955 Td [(library)-296(communications)-297(ar)18(e)-296(completely)-297(separated)-296(fr)18(om)-297(other)-296(communi-)]TJ 0 -11.955 Td [(cation)-250(operations.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F59 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(A)-250(call)-250(to)-250(this)-250(r)18(outine)-250(must)-250(pr)18(ecede)-250(any)-250(other)-250(PSBLAS)-250(call.)]TJ +0 g 0 G + 0 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(It)-194(is)-195(an)-194(err)18(or)-194(to)-194(specify)-195(a)-194(value)-194(for)]TJ/F60 9.9626 Tf 158.156 0 Td [(n)-80(p)]TJ/F62 9.9626 Tf 13.378 0 Td [(gr)18(eater)-194(than)-195(the)-194(number)-194(of)-194(pr)18(ocesses)]TJ -159.081 -11.955 Td [(available)-250(in)-250(the)-250(underlying)-250(base)-250(parallel)-250(envir)18(onment.)]TJ +0 g 0 G + 139.477 -84.647 Td [(112)]TJ 0 g 0 G ET endstream endobj -1467 0 obj +1739 0 obj << -/Length 3146 +/Length 4457 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.2)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 151.016 706.129 Td [(info)-211(\227)-211(Return)-211(information)-210(about)-211(PSBLAS)-211(parallel)-211(en-)]TJ -24.221 -13.948 Td [(vironment)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(On)-226(entry)-227(to)-226(this)-227(r)18(outine)-226(the)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F59 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ +/F67 9.9626 Tf -26.9 -18.964 Td [(call)-525(psb_info\050ctxt,)-525(iam,)-525(np\051)]TJ/F62 9.9626 Tf 14.944 -21.917 Td [(This)-397(subr)18(outine)-396(r)18(eturns)-397(information)-397(about)-396(the)-397(PSBLAS)-397(parallel)-396(envir)18(on-)]TJ -14.944 -11.956 Td [(ment,)-250(de\002ning)-250(a)-250(virtual)-250(parallel)-250(machine.)]TJ 0 g 0 G - -62.017 -19.925 Td [(2.)]TJ +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(The)-250(sparse)-250(matrix)-250(may)-250(be)-250(in)-250(either)-250(the)-250(build)-250(or)-250(update)-250(state;)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -19.925 Td [(3.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(Duplicate)-421(entries)-422(ar)18(e)-421(detected)-421(and)-422(handled)-421(in)-421(both)-421(build)-422(and)-421(update)]TJ 12.453 -11.955 Td [(state,)-244(wit)1(h)-242(the)-242(exception)-242(of)-242(the)-242(err)18(or)-242(action)-242(that)-242(is)-242(only)-241(taken)-242(in)-242(the)-242(build)]TJ 0 -11.955 Td [(state,)-250(i.e.)-310(on)-250(the)-250(\002rst)-250(assembly;)]TJ 0 g 0 G - -12.453 -19.926 Td [(4.)]TJ + 0 -19.925 Td [(ctxt)]TJ 0 g 0 G - [-500(If)-190(the)-190(update)-190(choice)-190(is)]TJ/F59 9.9626 Tf 108.372 0 Td [(psb_upd_perm_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)-202(then)-190(subsequent)-190(calls)-190(to)]TJ/F59 9.9626 Tf 109.946 0 Td [(psb_spins)]TJ/F54 9.9626 Tf -273.86 -11.955 Td [(to)-309(update)-309(the)-308(matrix)-309(must)-309(be)-309(arranged)-309(in)-308(such)-309(a)-309(way)-309(as)-309(to)-308(pr)18(oduce)-309(ex-)]TJ 0 -11.955 Td [(actly)-319(the)-320(same)-319(sequence)-320(of)-319(coef)18(\002cient)-319(values)-320(as)-319(encounter)18(ed)-319(at)-320(the)-319(\002rst)]TJ 0 -11.955 Td [(assembly;)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - -12.453 -19.926 Td [(5.)]TJ +/F59 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(The)-250(output)-250(storage)-250(format)-250(need)-250(not)-250(be)-250(the)-250(same)-250(on)-250(all)-250(pr)18(ocesses;)]TJ 0 g 0 G - 0 -19.925 Td [(6.)]TJ + 0 -19.925 Td [(iam)]TJ 0 g 0 G - [-500(On)-249(exit)-249(fr)18(om)-250(this)-249(r)18(outine)-249(the)-249(matrix)-249(is)-250(in)-249(the)-249(assembled)-249(state,)-250(and)-249(thus)-249(is)]TJ 12.453 -11.955 Td [(suitable)-250(for)-250(the)-250(computational)-250(r)18(outines;)]TJ +/F62 9.9626 Tf 22.137 0 Td [(Identi\002er)-250(of)-250(curr)18(ent)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 2.77 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ/F91 10.3811 Tf 134.19 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)]TJ/F91 10.3811 Tf 7.873 0 Td [(\024)]TJ/F60 9.9626 Tf 11.017 0 Td [(i)-47(a)-25(m)]TJ/F91 10.3811 Tf 18.677 0 Td [(\024)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1)]TJ 0 g 0 G - -12.453 -19.925 Td [(7.)]TJ +/F59 9.9626 Tf -239.579 -19.925 Td [(np)]TJ 0 g 0 G - [-500(If)-431(the)]TJ/F59 9.9626 Tf 41.543 0 Td [(bldmode=psb_matbld_remote_)]TJ/F54 9.9626 Tf 140.288 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-272(the)-273(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.956 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-288(element)-288(applications,)-297(with)]TJ/F59 9.9626 Tf 145.88 0 Td [(dupl=psb_dupl_add_)]TJ/F54 9.9626 Tf 94.147 0 Td [(;)-307(it)-288(is)-287(necessary)-288(to)]TJ -240.027 -11.955 Td [(check)-236(for)-235(possible)-236(updates)-236(needed)-235(in)-236(the)-236(descriptor)74(,)-238(hence)-236(ther)18(e)-236(will)-235(be)-236(a)]TJ 0 -11.955 Td [(r)8(untime)-250(over)18(head.)]TJ +/F62 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G - 141.968 -332.752 Td [(87)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(For)-396(pr)18(ocesses)-396(in)-395(the)-396(virtual)-396(parallel)-396(machine)-396(the)-396(ident)1(i\002er)-396(will)-396(satisfy)]TJ 12.453 -11.955 Td [(0)]TJ/F91 10.3811 Tf 7.873 0 Td [(\024)]TJ/F60 9.9626 Tf 11.016 0 Td [(i)-47(a)-25(m)]TJ/F91 10.3811 Tf 18.678 0 Td [(\024)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1;)]TJ +0 g 0 G + -84.741 -19.925 Td [(2.)]TJ +0 g 0 G + [-500(If)-349(the)-349(user)-350(has)-349(r)18(equested)-349(on)]TJ/F67 9.9626 Tf 142.217 0 Td [(psb_init)]TJ/F62 9.9626 Tf 45.321 0 Td [(a)-349(number)-349(of)-350(pr)18(ocesses)-349(less)-349(than)]TJ -175.085 -11.956 Td [(the)-321(total)-322(available)-321(in)-321(the)-322(parallel)-321(execution)-322(envir)18(onment,)-339(the)-321(r)18(emaining)]TJ 0 -11.955 Td [(pr)18(ocesses)-229(will)-228(have)-229(on)-229(r)18(eturn)]TJ/F60 9.9626 Tf 130.21 0 Td [(i)-47(a)-25(m)]TJ/F93 10.3811 Tf 18.678 0 Td [(=)]TJ/F91 10.3811 Tf 11.086 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1;)-236(the)-229(on)1(ly)-229(call)-229(involving)]TJ/F67 9.9626 Tf 110.162 0 Td [(ctxt)]TJ/F62 9.9626 Tf 23.2 0 Td [(that)]TJ -301.53 -11.955 Td [(any)-250(such)-250(pr)18(ocess)-250(may)-250(execute)-250(is)-250(to)]TJ/F67 9.9626 Tf 155.296 0 Td [(psb_exit)]TJ/F62 9.9626 Tf 41.843 0 Td [(.)]TJ +0 g 0 G + -57.662 -174.311 Td [(113)]TJ 0 g 0 G ET endstream endobj -1480 0 obj +1745 0 obj << -/Length 2987 +/Length 4180 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.10)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.3)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(spfree)-250(\227)-250(Frees)-250(a)-250(sparse)-250(matrix)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(exit)-250(\227)-250(Exit)-250(from)-250(PSBLAS)-250(parallel)-250(environment)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_spfree\050a,)-525(desc_a,)-525(info\051)]TJ +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_exit\050ctxt\051)]TJ 0 -11.955 Td [(call)-525(psb_exit\050ctxt,close\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(exits)-250(fr)18(om)-250(the)-250(PSBLAS)-250(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(a)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 577.576 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 577.576 Td [(type)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F59 9.9626 Tf -24.907 -19.926 Td [(close)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 557.85 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 557.651 Td [(a)]TJ +/F62 9.9626 Tf 27.666 0 Td [(Whether)-369(to)-368(close)-369(all)-369(data)-368(str)8(uctur)18(es)-369(r)18(elated)-369(to)-368(the)-369(virtual)-369(parall)1(el)-369(ma-)]TJ -2.759 -11.955 Td [(chine,)-250(besides)-250(those)-250(associated)-250(with)-250(ctxt.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(variable,)-250(default)-250(value:)-310(tr)8(ue.)]TJ/F59 11.9552 Tf -24.907 -19.925 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 509.83 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 510.029 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 509.83 Td [(type)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-500(This)-376(r)18(outine)-376(may)-377(be)-376(called)-376(even)-376(if)-377(a)-376(pr)18(evious)-376(call)-376(to)]TJ/F67 9.9626 Tf 255.069 0 Td [(psb_info)]TJ/F62 9.9626 Tf 45.591 0 Td [(has)-376(r)18(e-)]TJ -288.206 -11.955 Td [(turned)-251(with)]TJ/F60 9.9626 Tf 55.156 0 Td [(i)-47(a)-25(m)]TJ/F93 10.3811 Tf 18.696 0 Td [(=)]TJ/F91 10.3811 Tf 11.104 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1;)-251(indeed,)-252(it)-251(it)-251(i)1(s)-251(the)-251(only)-251(r)18(outine)-251(that)-251(may)-251(be)-251(called)]TJ -93.15 -11.955 Td [(with)-250(ar)18(gument)]TJ/F67 9.9626 Tf 68.133 0 Td [(ctxt)]TJ/F62 9.9626 Tf 23.412 0 Td [(in)-250(this)-250(situation.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ + -103.999 -19.926 Td [(2.)]TJ 0 g 0 G + [-500(A)-269(call)-269(to)-268(this)-269(r)18(outine)-269(with)]TJ/F67 9.9626 Tf 128.502 0 Td [(close=.true.)]TJ/F62 9.9626 Tf 65.442 0 Td [(implies)-269(a)-268(call)-269(to)]TJ/F67 9.9626 Tf 72.059 0 Td [(MPI_Finalize)]TJ/F62 9.9626 Tf 62.764 0 Td [(,)]TJ -316.313 -11.955 Td [(after)-250(which)-250(no)-250(parallel)-250(r)18(outine)-250(may)-250(be)-250(called.)]TJ 0 g 0 G - 0 -19.925 Td [(info)]TJ + -12.454 -19.925 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-500(If)-289(the)-288(user)-288(whishes)-289(to)-288(use)-289(multiple)-288(communication)-289(contexts)-288(in)-289(the)-288(same)]TJ 12.454 -11.955 Td [(pr)18(ogram,)-401(or)-371(to)-371(enter)-371(and)-371(exit)-371(multiple)-371(times)-370(into)-371(the)-371(parallel)-371(envir)18(on-)]TJ 0 -11.956 Td [(ment,)-425(this)-389(r)18(outine)-390(may)-390(be)-390(called)-389(to)-390(selectively)-390(close)-390(the)-389(contexts)-390(with)]TJ/F67 9.9626 Tf 0 -11.955 Td [(close=.false.)]TJ/F62 9.9626 Tf 67.994 0 Td [(,)-202(while)-190(on)-190(the)-190(last)-190(call)-190(it)-190(should)-190(be)-190(called)-190(with)]TJ/F67 9.9626 Tf 196.919 0 Td [(close=.true.)]TJ/F62 9.9626 Tf -264.913 -11.955 Td [(to)-250(shutdown)-250(in)-250(a)-250(clean)-250(way)-250(the)-250(entir)18(e)-250(parallel)-250(envir)18(onment.)]TJ 0 g 0 G - 141.968 -329.728 Td [(88)]TJ + 139.477 -212.169 Td [(114)]TJ 0 g 0 G ET endstream endobj -1486 0 obj +1752 0 obj << -/Length 3858 +/Length 2476 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.11)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.4)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(sprn)-254(\227)-255(Reinit)-254(sparse)-255(matrix)-254(structure)-254(for)-255(psblas)-254(rou-)]TJ -24.221 -13.948 Td [(tines.)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -32.877 -18.964 Td [(call)-525(psb_sprn\050a,)-525(decsc_a,)-525(info,)-525(clear\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.917 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.926 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(to)-250(be)-250(r)18(einitialized.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(get)]TJ ET q -1 0 0 1 309.258 563.828 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 168.338 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 312.397 563.628 Td [(Tspmat)]TJ +/F59 11.9552 Tf 171.925 706.129 Td [(mpi)]TJ ET q -1 0 0 1 344.406 563.828 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 194.556 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 347.544 563.628 Td [(type)]TJ +/F59 11.9552 Tf 198.143 706.129 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 543.902 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 543.703 Td [(a)]TJ +/F67 9.9626 Tf -98.248 -18.964 Td [(icomm)-525(=)-525(psb_get_mpi_comm\050ctxt\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-417(func)1(tion)-417(r)18(eturns)-416(the)-417(MPI)-416(communicator)-417(associated)-416(with)-417(a)-416(PSBLAS)]TJ -14.944 -11.955 Td [(context)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 496.082 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 495.882 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 496.082 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 495.882 Td [(type)]TJ +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(clear)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 26.561 0 Td [(Choose)-250(whether)-250(to)-250(zer)18(o)-250(out)-250(matrix)-250(coef)18(\002cients)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Default:)-310(tr)8(ue.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.917 Td [(On)-250(Return)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - 0 -19.926 Td [(info)]TJ +/F59 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ + 0 -19.926 Td [(Function)-250(value)]TJ 0 g 0 G - [-500(On)-250(exit)-250(fr)18(om)-250(this)-250(r)18(outine)-250(the)-250(sparse)-250(matrix)-250(is)-250(in)-250(the)-250(update)-250(state.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-372(MPI)-371(communicator)-372(associated)-371(with)-372(the)-372(PSBLAS)-371(virtual)]TJ -47.87 -11.955 Td [(parallel)-250(machine.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ/F59 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F62 9.9626 Tf 34.165 0 Td [(The)-230(subr)18(outine)-230(version)]TJ/F67 9.9626 Tf 103.913 0 Td [(psb_get_mpicomm)]TJ/F62 9.9626 Tf 80.748 0 Td [(is)-230(still)-230(available)-230(but)-230(is)-230(depr)18(e-)]TJ -218.826 -11.955 Td [(cated.)]TJ 0 g 0 G - 154.421 -206.192 Td [(89)]TJ + 164.384 -319.766 Td [(115)]TJ 0 g 0 G ET endstream endobj -1493 0 obj +1757 0 obj << -/Length 6166 +/Length 3337 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.12)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.5)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(geall)-250(\227)-250(Allocates)-250(a)-250(dense)-250(matrix)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geall\050x,)-525(desc_a,)-525(info[,)-525(dupl,)-525(bldmode,)-525(n,)-525(lb]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.627 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.01 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.009 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 629.719 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 629.519 Td [(a)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(get)]TJ ET q -1 0 0 1 328.257 581.898 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 219.148 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 331.395 581.699 Td [(desc)]TJ +/F59 11.9552 Tf 222.735 706.129 Td [(mpi)]TJ ET q -1 0 0 1 352.944 581.898 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 245.365 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 356.083 581.699 Td [(type)]TJ +/F59 11.9552 Tf 248.952 706.129 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -226.299 -30.965 Td [(n)]TJ +/F67 9.9626 Tf -98.247 -18.964 Td [(rank)-525(=)-525(psb_get_mpi_rank\050ctxt,)-525(id\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(function)-250(r)18(eturns)-250(the)-250(MPI)-250(rank)-250(of)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 257.337 0 Td [(i)-32(d)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(The)-250(number)-250(of)-250(columns)-250(of)-250(the)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F52 9.9626 Tf 295.578 0 Td [(x)]TJ/F54 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.956 Td [(rank-1)-250(array)111(.)]TJ +/F59 9.9626 Tf -272.281 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.009 Td [(lb)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(The)-237(lower)-238(bound)-237(for)-238(the)-237(column)-238(index)-237(range)-237(of)-238(the)-237(dense)-238(matrix)-237(to)-238(be)-237(allo-)]TJ 10.52 -11.955 Td [(cated.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-297(as:)-404(Integer)-297(scalar)74(,)-309(default)-297(1.)-450(It)-297(is)-297(not)-297(a)-297(valid)-297(ar)18(gument)-297(if)]TJ/F52 9.9626 Tf 295.578 0 Td [(x)]TJ/F54 9.9626 Tf 8.164 0 Td [(is)-297(a)]TJ -303.742 -11.955 Td [(rank-1)-250(array)111(.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.009 Td [(dupl)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(How)-250(to)-250(handle)-250(duplicate)-250(coef)18(\002cients.)]TJ -1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-243(as:)-306(integer)74(,)-244(possible)-243(values:)]TJ/F59 9.9626 Tf 164.942 0 Td [(psb_dupl_ovwrt_)]TJ/F54 9.9626 Tf 78.455 0 Td [(,)]TJ/F59 9.9626 Tf 4.923 0 Td [(psb_dupl_add_)]TJ/F54 9.9626 Tf 67.994 0 Td [(;)]TJ/F59 9.9626 Tf -316.314 -11.955 Td [(psb_dupl_err_)]TJ/F54 9.9626 Tf 70.485 0 Td [(has)-250(no)-250(ef)18(fect.)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F51 9.9626 Tf -95.391 -19.009 Td [(bldmode)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 45.937 0 Td [(Whether)-372(to)-372(keep)-371(track)-372(of)-372(matrix)-372(entries)-371(that)-372(do)-372(not)-372(belong)-371(to)-372(the)]TJ -21.031 -11.955 Td [(curr)18(ent)-250(pr)18(ocess.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F59 9.9626 Tf 128.288 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 94.146 0 Td [(.)]TJ -331.544 -11.955 Td [(Default:)]TJ/F59 9.9626 Tf 38.515 0 Td [(psb_matbld_noremote_)]TJ/F54 9.9626 Tf 104.607 0 Td [(.)]TJ +/F59 9.9626 Tf -24.907 -19.926 Td [(id)]TJ 0 g 0 G -/F51 9.9626 Tf -168.028 -19.627 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 14.386 0 Td [(Identi\002er)-250(of)-250(a)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(0)]TJ/F91 10.3811 Tf 141.938 0 Td [(\024)]TJ/F60 9.9626 Tf 11.017 0 Td [(i)-32(d)]TJ/F91 10.3811 Tf 11.086 0 Td [(\024)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.503 0 Td [(\000)]TJ/F62 9.9626 Tf 10.132 0 Td [(1)]TJ 0 g 0 G +/F59 9.9626 Tf -223.669 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - 0 -19.009 Td [(x)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(allocated.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.943 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-273(as:)-357(a)-273(rank)-274(one)-273(or)-274(two)-273(array)-273(with)-274(the)-273(ALLOCA)74(T)74(ABLE)-273(attribute)]TJ 0 -11.955 Td [(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 86.634 0 Td [(psb)]TJ -ET -q -1 0 0 1 278.564 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 281.702 132.281 Td [(T)]TJ -ET -q -1 0 0 1 287.56 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 290.699 132.281 Td [(vect)]TJ -ET -q -1 0 0 1 312.247 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 315.386 132.281 Td [(type)]TJ + 0 -19.925 Td [(Funciton)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +/F62 9.9626 Tf 72.777 0 Td [(The)-250(MPI)-250(rank)-250(associated)-250(with)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 224.291 0 Td [(i)-32(d)]TJ/F62 9.9626 Tf 8.195 0 Td [(.)]TJ -280.356 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ/F59 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F62 9.9626 Tf 35.734 0 Td [(The)-388(subr)18(outine)-387(version)]TJ/F67 9.9626 Tf 108.62 0 Td [(psb_get_rank)]TJ/F62 9.9626 Tf 66.626 0 Td [(is)-388(still)-387(available)-388(but)-387(is)-388(depr)18(e-)]TJ -210.98 -11.955 Td [(cated.)]TJ 0 g 0 G - -18.728 -41.843 Td [(90)]TJ + 164.384 -275.93 Td [(116)]TJ 0 g 0 G ET endstream endobj -1497 0 obj +1761 0 obj << -/Length 925 +/Length 1155 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.6)-1000(psb)]TJ +ET +q +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 151.016 706.129 Td [(wtime)-250(\227)-250(W)74(all)-250(clock)-250(timing)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F67 9.9626 Tf -51.121 -18.964 Td [(time)-525(=)-525(psb_wtime\050\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-298(function)-298(r)18(eturns)-299(a)-298(wall)-298(clock)-298(timer)74(.)-455(The)-298(r)18(esolution)-298(of)-299(the)-298(timer)-298(is)-298(de-)]TJ -14.944 -11.955 Td [(pendent)-250(on)-250(the)-250(underlying)-250(parallel)-250(envir)18(onment)-250(implementation.)]TJ 0 g 0 G - [-500(Using)]TJ/F59 9.9626 Tf 41.798 0 Td [(psb_matbld_remote_)]TJ/F54 9.9626 Tf 97.28 0 Td [(is)-314(likely)-315(to)-315(cau)1(se)-315(a)-315(r)8(untime)-314(over)18(head)-315(at)-314(as-)]TJ -126.625 -11.955 Td [(sembly)-250(time;)]TJ +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G - 141.968 -514.072 Td [(91)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Exit)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(Function)-250(value)]TJ +0 g 0 G +/F62 9.9626 Tf 72.777 0 Td [(the)-250(elapsed)-250(time)-250(in)-250(seconds.)]TJ -47.87 -11.955 Td [(Returned)-250(as:)-310(a)]TJ/F67 9.9626 Tf 66.022 0 Td [(real\050psb_dpk_\051)]TJ/F62 9.9626 Tf 75.715 0 Td [(variable.)]TJ +0 g 0 G + -2.26 -491.123 Td [(117)]TJ 0 g 0 G ET endstream endobj -1505 0 obj +1765 0 obj << -/Length 6336 +/Length 1388 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.13)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.7)-1000(psb)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(geins)-250(\227)-250(Dense)-250(matrix)-250(insertion)-250(routine)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geins\050m,)-525(irw,)-525(val,)-525(x,)-525(desc_a,)-525(info)-525([,local]\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.57 Td [(T)90(ype:)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(barrier)-240(\227)-240(Sinchronization)-239(point)-240(parallel)-240(environment)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.386 Td [(On)-250(Entry)]TJ +/F67 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_barrier\050ctxt\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-358(subr)18(outine)-357(acts)-358(as)-358(an)-358(explicit)-357(synchr)18(onization)-358(point)-358(for)-357(the)-358(PSBLAS)]TJ -14.944 -11.955 Td [(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G +/F59 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G - 0 -19.386 Td [(m)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows)-250(in)]TJ/F52 9.9626 Tf 86.569 0 Td [(v)-40(a)-25(l)]TJ/F54 9.9626 Tf 15.736 0 Td [(to)-250(be)-250(inserted.)]TJ -91.237 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.386 Td [(irw)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Indices)-381(of)-382(the)-382(r)18(ows)-381(to)-382(be)-381(inserted.)-705(Speci\002cally)111(,)-414(r)18(ow)]TJ/F52 9.9626 Tf 239.84 0 Td [(i)]TJ/F54 9.9626 Tf 6.765 0 Td [(of)]TJ/F52 9.9626 Tf 12.683 0 Td [(v)-40(a)-25(l)]TJ/F54 9.9626 Tf 17.046 0 Td [(will)-381(be)-382(in-)]TJ -271.901 -11.955 Td [(serted)-344(into)-344(the)-344(local)-344(r)18(ow)-344(corr)18(esponding)-344(to)-344(the)-344(global)-344(r)18(ow)-344(index)]TJ/F52 9.9626 Tf 290.218 0 Td [(i)-22(r)-35(w)]TJ/F85 10.3811 Tf 14.654 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 4.149 0 Td [(.)]TJ -316.314 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.386 Td [(val)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 18.819 0 Td [(the)-250(dense)-250(submatrix)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)-310(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ + 139.477 -455.258 Td [(118)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.386 Td [(desc)]TJ ET -q -1 0 0 1 171.218 414.446 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 414.247 Td [(a)]TJ + +endstream +endobj +1769 0 obj +<< +/Length 1283 +>> +stream +0 g 0 G 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 366.626 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q BT -/F59 9.9626 Tf 363.206 366.426 Td [(desc)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.8)-1000(psb)]TJ ET q -1 0 0 1 384.755 366.626 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 147.429 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 387.893 366.426 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.386 Td [(local)]TJ 0 g 0 G -/F54 9.9626 Tf 26.56 0 Td [(Whether)-240(the)-240(entries)-241(in)-240(the)-240(index)-240(vector)]TJ/F59 9.9626 Tf 173.162 0 Td [(irw)]TJ/F54 9.9626 Tf 15.692 0 Td [(,)-242(ar)18(e)-240(alr)18(eady)-241(i)1(n)-241(local)-240(number)18(-)]TJ -190.508 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value;)-250(default:)]TJ/F59 9.9626 Tf 162.678 0 Td [(.false.)]TJ/F54 9.9626 Tf 36.613 0 Td [(.)]TJ +/F67 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_abort\050ctxt\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(aborts)-250(computation)-250(on)-250(the)-250(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G -/F51 9.9626 Tf -224.197 -20.57 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -19.387 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(output)-250(dense)-250(matrix.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(rank)-190(one)-190(or)-190(two)-190(array)-190(or)-190(an)-190(object)-190(of)-190(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 242.569 0 Td [(psb)]TJ -ET -q -1 0 0 1 434.498 211.642 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 437.636 211.443 Td [(T)]TJ -ET -q -1 0 0 1 443.494 211.642 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 446.633 211.443 Td [(vect)]TJ -ET -q -1 0 0 1 468.182 211.642 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 471.32 211.443 Td [(type)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)]TJ -316.63 -11.955 Td [(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.342 Td [(info)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - 141.968 -29.888 Td [(92)]TJ + 139.477 -467.213 Td [(119)]TJ 0 g 0 G ET endstream endobj -1509 0 obj +1773 0 obj << -/Length 539 +/Length 5526 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(Notes)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.9)-1000(psb)]TJ +ET +q +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 201.825 706.129 Td [(bcast)-250(\227)-250(Broadcast)-250(data)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(Dense)-250(vectors/matrices)-250(do)-250(not)-250(have)-250(an)-250(associated)-250(state;)]TJ +/F67 9.9626 Tf -51.12 -20.269 Td [(call)-525(psb_bcast\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -24.611 Td [(This)-221(subr)18(outine)-222(implements)-221(a)-221(br)18(oadcast)-222(operation)-221(based)-221(on)-222(the)-221(underlying)]TJ -14.944 -11.955 Td [(communication)-250(library)111(.)]TJ 0 g 0 G - 0 -19.926 Td [(2.)]TJ +/F59 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Duplicate)-326(entries)-326(ar)18(e)-325(either)-326(overwritten)-326(or)-326(added,)-345(ther)18(e)-325(is)-326(no)-326(pr)18(ovision)]TJ 12.453 -11.955 Td [(for)-250(raising)-250(an)-250(err)18(or)-250(condition.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - 141.968 -563.885 Td [(93)]TJ +/F59 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.619 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(On)-250(the)-250(r)18(oot)-250(pr)18(ocess,)-250(the)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-344(a)-344(rank)-344(1)-344(or)-344(2)-344(array)111(,)-367(or)-344(a)-344(character)-344(or)-344(logical)-344(variable,)-367(which)-344(may)-344(be)]TJ 0 -11.955 Td [(a)-377(scalar)-377(or)-377(rank)-377(1)-377(array)111(.)-1067(T)90(ype,)-409(kind,)-409(rank)-377(and)-376(size)-377(must)-377(agr)18(ee)-377(on)-377(all)]TJ 0 -11.955 Td [(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Root)-250(pr)18(ocess)-250(holding)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 1.654 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.503 0 Td [(\000)]TJ/F62 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(0)]TJ +0 g 0 G +/F59 9.9626 Tf -246.025 -22.618 Td [(mode)]TJ +0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 164.384 -29.887 Td [(120)]TJ 0 g 0 G ET endstream endobj -1518 0 obj +1777 0 obj << -/Length 6120 +/Length 5329 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.14)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(geasb)-250(\227)-250(Assembly)-250(a)-250(dense)-250(matrix)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(dat)]TJ 0 g 0 G +/F62 9.9626 Tf 19.368 0 Td [(On)-250(all)-250(pr)18(ocesses)-250(other)-250(than)-250(r)18(oot,)-250(the)-250(br)18(oadcasted)-250(data.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-315(a)-314(rank)-315(1)-315(or)-314(2)-315(array)111(,)-331(or)-314(a)-315(character)-315(or)-314(logical)-315(scalar)74(.)-819(T)90(ype,)-330(kind,)-331(rank)]TJ 0 -11.955 Td [(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_geasb\050x,)-525(desc_a,)-525(info,)-525(mold\051)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 171.218 625.596 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 124.802 417.212 cm +0 0 318.804 27.895 re f Q -BT -/F51 9.9626 Tf 174.207 625.397 Td [(a)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.328 0 Td [(psb)]TJ -ET -q -1 0 0 1 328.257 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 331.395 577.576 Td [(desc)]TJ -ET -q -1 0 0 1 352.944 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 356.083 577.576 Td [(type)]TJ +/F102 8.9664 Tf 137.205 434.448 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [-525(psb_bcast\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -226.299 -31.88 Td [(mold)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -3.876 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-223(as:)-296(an)-223(object)-223(of)-222(a)-223(class)-223(derived)-223(fr)18(om)]TJ/F59 9.9626 Tf 199.087 0 Td [(psb)]TJ -ET -q -1 0 0 1 391.016 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 394.155 497.875 Td [(T)]TJ -ET -q -1 0 0 1 400.012 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 403.151 497.875 Td [(base)]TJ -ET -q -1 0 0 1 424.7 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 427.838 497.875 Td [(vect)]TJ -ET -q -1 0 0 1 449.387 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 452.525 497.875 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(;)-232(this)]TJ -297.835 -11.955 Td [(is)-250(only)-250(allowed)-250(when)]TJ/F52 9.9626 Tf 97.12 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 42.899 0 Td [(psb)]TJ -ET -q -1 0 0 1 339.644 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 342.783 485.92 Td [(T)]TJ -ET -q -1 0 0 1 348.641 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 351.779 485.92 Td [(vect)]TJ -ET -q -1 0 0 1 373.328 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 376.466 485.92 Td [(type)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G -/F51 9.9626 Tf -246.682 -21.918 Td [(On)-250(Return)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0 g 0 G - 0 -19.925 Td [(x)]TJ +/F62 9.9626 Tf -48.393 -36.165 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(assembled.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-259(rank)-258(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 50.55 0 Td [(psb)]TJ -ET -q -1 0 0 1 242.48 384.5 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 245.618 384.301 Td [(T)]TJ -ET -q -1 0 0 1 251.476 384.5 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 254.614 384.301 Td [(vect)]TJ + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 276.163 384.5 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 302.642 cm +0 0 318.804 60.772 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 279.301 384.301 Td [(type)]TJ +/F102 8.9664 Tf 137.205 352.754 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ + [-525(psb_bcast\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -149.518 -31.881 Td [(info)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(On)-227(entry)-226(to)-227(this)-226(r)18(outine)-227(th)1(e)-227(descriptor)-226(must)-227(be)-226(in)-227(the)-226(assembled)-227(state,)-231(i.e.)]TJ/F59 9.9626 Tf 12.453 -11.956 Td [(psb_cdasb)]TJ/F54 9.9626 Tf 49.564 0 Td [(must)-250(alr)18(eady)-250(have)-250(been)-250(called.)]TJ + [(bcast_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -62.017 -19.925 Td [(2.)]TJ + -23.537 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(If)-431(the)]TJ/F59 9.9626 Tf 41.544 0 Td [(bldmode=psb_matbld_remote_)]TJ/F54 9.9626 Tf 140.287 0 Td [(value)-431(was)-432(speci\002ed)-431(at)-432(allocation)]TJ -169.378 -11.955 Td [(time,)-278(contributions)-272(de\002ned)-272(on)-273(the)-272(curr)18(ent)-272(pr)18(ocess)-272(but)-272(belonging)-273(to)-272(a)-272(r)18(e-)]TJ 0 -11.955 Td [(mote)-267(pr)18(ocess)-266(will)-267(be)-267(handled)-267(accor)18(dingly)111(.)-360(This)-267(is)-266(most)-267(likely)-267(to)-266(occur)-267(in)]TJ 0 -11.955 Td [(\002nite)-250(element)-250(applications,)-250(with)]TJ/F59 9.9626 Tf 144.277 0 Td [(dupl=psb_dupl_add_)]TJ/F54 9.9626 Tf 94.146 0 Td [(.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G - -96.455 -104.573 Td [(94)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_bcast\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(bcast_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -218.48 Td [(121)]TJ 0 g 0 G ET endstream endobj -1526 0 obj +1786 0 obj << -/Length 3224 +/Length 5829 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.15)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.10)-1000(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(gefree)-250(\227)-250(Frees)-250(a)-250(dense)-250(matrix)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gefree\050x,)-525(desc_a,)-525(info\051)]TJ +/F67 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_sum\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -22.401 Td [(This)-353(subr)18(outine)-353(implements)-354(a)-353(sum)-353(r)18(eduction)-353(operation)-353(based)-354(on)-353(the)-353(un-)]TJ -14.944 -11.955 Td [(derlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(x)]TJ + 0 -20.408 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(fr)18(eed.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-259(as:)-328(a)-258(rank)-259(one)-259(or)-259(two)-259(array)-259(with)-259(the)-259(ALLOCA)74(T)74(ABLE)-258(or)-259(an)-259(ob-)]TJ 0 -11.955 Td [(ject)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 50.55 0 Td [(psb)]TJ -ET -q -1 0 0 1 191.67 565.82 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 194.809 565.621 Td [(T)]TJ -ET -q -1 0 0 1 200.666 565.82 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 203.805 565.621 Td [(vect)]TJ -ET -q -1 0 0 1 225.354 565.82 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 228.492 565.621 Td [(type)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)-250(of)-250(type)-250(r)18(eal,)-250(complex)-250(or)-250(integer)74(.)]TJ +/F59 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ 0 g 0 G -/F51 9.9626 Tf -149.518 -31.881 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 533.94 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 533.74 Td [(a)]TJ +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(sum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(variable)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 136.327 0 Td [(psb)]TJ -ET -q -1 0 0 1 277.448 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 280.586 485.92 Td [(desc)]TJ -ET -q -1 0 0 1 302.135 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 305.273 485.92 Td [(type)]TJ +/F59 9.9626 Tf -24.907 -20.408 Td [(root)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(to)-250(hold)-250(the)-250(\002nal)-250(sum,)-250(or)]TJ/F91 10.3811 Tf 143.744 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-250(to)-250(make)-250(it)-250(available)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -150.284 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ 0 g 0 G -/F51 9.9626 Tf -226.3 -33.873 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -254.343 -20.409 Td [(mode)]TJ 0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G - 0 -19.925 Td [(info)]TJ +/F59 9.9626 Tf -24.907 -32.364 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ 0 g 0 G - 141.968 -293.863 Td [(95)]TJ +0 g 0 G + 0 -20.408 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(sum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.781 0 Td [(.)]TJ +0 g 0 G + 79.264 -29.887 Td [(122)]TJ 0 g 0 G ET endstream endobj -1530 0 obj +1790 0 obj << -/Length 3218 +/Length 4964 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.16)-1000(psb)]TJ +/F62 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.926 Td [(2.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 124.802 441.123 cm +0 0 318.804 27.895 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F51 11.9552 Tf 207.803 706.129 Td [(gelp)-250(\227)-250(Applies)-250(a)-250(left)-250(permutation)-250(to)-250(a)-250(dense)-250(matrix)]TJ +/F102 8.9664 Tf 137.205 458.358 Td [(call)]TJ 0 g 0 G + [-525(psb_sum\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_gelp\050trans,)-525(iperm,)-525(x,)-525(info\051)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G 0 g 0 G - 0 -19.925 Td [(trans)]TJ +/F62 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(A)-250(character)-250(that)-250(speci\002es)-250(whether)-250(to)-250(permute)]TJ/F52 9.9626 Tf 203.748 0 Td [(A)]TJ/F54 9.9626 Tf 9.808 0 Td [(or)]TJ/F52 9.9626 Tf 12.488 0 Td [(A)]TJ/F52 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.616 Td [(.)]TJ -241.716 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(single)-250(character)-250(with)-250(value)-250('N')-250(for)]TJ/F52 9.9626 Tf 218.195 0 Td [(A)]TJ/F54 9.9626 Tf 9.808 0 Td [(or)-250('T')-250(for)]TJ/F52 9.9626 Tf 41.807 0 Td [(A)]TJ/F52 7.5716 Tf 7.511 3.616 Td [(T)]TJ/F54 9.9626 Tf 5.401 -3.616 Td [(.)]TJ + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 326.552 cm +0 0 318.804 60.772 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -307.628 -31.88 Td [(iperm)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 32.099 0 Td [(An)-250(integer)-250(array)-250(containing)-250(permutation)-250(information.)]TJ -7.193 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(one-dimensional)-250(array)111(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 376.664 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(x)]TJ + [-525(psb_sum\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(The)-250(dense)-250(matrix)-250(to)-250(be)-250(permuted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(one)-250(or)-250(two)-250(dimensional)-250(array)111(.)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -33.873 Td [(On)-250(Return)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 0 -19.926 Td [(info)]TJ + [(sum_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + -23.537 -10.958 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 141.968 -226.117 Td [(96)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_sum\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(sum_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -242.391 Td [(123)]TJ 0 g 0 G ET endstream endobj -1535 0 obj +1797 0 obj << -/Length 6238 +/Length 5548 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.17)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(glob)]TJ -ET -q -1 0 0 1 182.29 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 185.877 706.129 Td [(to)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.11)-1000(psb)]TJ ET q -1 0 0 1 197.222 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 200.809 706.129 Td [(loc)-250(\227)-250(Global)-250(to)-250(local)-250(indices)-250(convertion)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -100.914 -18.964 Td [(call)-525(psb_glob_to_loc\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ 0 -11.955 Td [(call)-525(psb_glob_to_loc\050x,)-525(desc_a,)-525(info,)-525(iact,owned\051)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.109 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.602 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.601 Td [(x)]TJ +/F67 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_max\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -24.611 Td [(This)-354(subr)18(outine)-354(implements)-354(a)-354(maximum)-354(valuer)18(eduction)-354(operation)-354(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +/F59 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.557 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 535.72 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 535.52 Td [(a)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 487.899 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 487.7 Td [(desc)]TJ -ET -q -1 0 0 1 333.945 487.899 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 337.084 487.7 Td [(type)]TJ +/F59 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -19.602 Td [(iact)]TJ + 0 -22.619 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -316.929 -19.601 Td [(owned)]TJ +/F59 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 35.975 0 Td [(Spec\002es)-250(valid)-250(range)-250(of)-250(input)-250(Scope:)]TJ/F51 9.9626 Tf 159.54 0 Td [(global)]TJ/F54 9.9626 Tf -170.608 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(If)-320(tr)8(ue,)-337(then)-320(only)-320(indices)-320(strictly)-320(owned)-320(by)-320(the)-319(curr)18(ent)-320(pr)18(ocess)-320(ar)18(e)-320(con-)]TJ 0 -11.955 Td [(sider)18(ed)-250(valid,)-250(if)-250(false)-250(then)-250(halo)-250(indices)-250(ar)18(e)-250(also)-250(accepted.)-310(Default:)-310(false.)]TJ +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.109 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.907 -22.619 Td [(root)]TJ 0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-255(to)-255(hold)-255(the)-255(\002nal)-255(maximum,)-257(or)]TJ/F91 10.3811 Tf 170.502 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-255(to)-255(make)-255(it)-255(available)-255(on)-255(all)-255(pr)18(o-)]TJ -177.042 -11.955 Td [(cesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ 0 g 0 G - 0 -19.601 Td [(x)]TJ +/F59 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(If)]TJ/F52 9.9626 Tf 9.727 0 Td [(y)]TJ/F54 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F52 9.9626 Tf 88.385 0 Td [(x)]TJ/F54 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.602 Td [(y)]TJ +/F59 9.9626 Tf -24.907 -34.573 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(If)]TJ/F52 9.9626 Tf 9.521 0 Td [(y)]TJ/F54 9.9626 Tf 7.827 0 Td [(is)-273(pr)18(esent,)-279(then)]TJ/F52 9.9626 Tf 70.133 0 Td [(y)]TJ/F54 9.9626 Tf 7.827 0 Td [(is)-273(overwritten)-273(with)-273(the)-273(translated)-274(integer)-273(indices,)-279(and)]TJ/F52 9.9626 Tf -80.628 -11.955 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F51 9.9626 Tf 112.557 0 Td [(global)]TJ/F54 9.9626 Tf -120.547 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.602 Td [(info)]TJ +/F59 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.108 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 166.875 -29.888 Td [(97)]TJ +/F62 9.9626 Tf 164.384 -29.887 Td [(124)]TJ 0 g 0 G ET endstream endobj -1540 0 obj +1801 0 obj << -/Length 672 +/Length 5227 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 163.158 706.129 Td [(1.)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(dat)]TJ 0 g 0 G - [-500(If)-272(an)-273(input)-272(index)-272(is)-273(out)-272(of)-273(range)1(,)-278(then)-273(the)-272(corr)18(esponding)-272(output)-273(index)-272(is)]TJ 12.453 -11.955 Td [(set)-250(to)-250(a)-250(negative)-250(number;)]TJ +/F62 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ 0 g 0 G - [-500(The)-416(default)]TJ/F59 9.9626 Tf 68.74 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e)-416(means)-417(that)-416(the)-417(negative)-416(output)-416(is)-417(the)-416(only)-416(action)]TJ -61.517 -11.955 Td [(taken)-250(on)-250(an)-250(out-of-range)-250(input.)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G - 141.968 -571.855 Td [(98)]TJ +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G -ET - -endstream -endobj -1547 0 obj -<< -/Length 5458 ->> -stream + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G + -12.453 -19.926 Td [(2.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.18)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(loc)]TJ -ET -q -1 0 0 1 173.646 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 177.233 706.129 Td [(to)]TJ + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 188.578 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 124.802 429.167 cm +0 0 318.804 27.895 re f Q -BT -/F51 11.9552 Tf 192.165 706.129 Td [(glob)-250(\227)-250(Local)-250(to)-250(global)-250(indices)-250(conversion)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -92.27 -18.964 Td [(call)-525(psb_loc_to_glob\050x,)-525(y,)-525(desc_a,)-525(info,)-525(iact\051)]TJ 0 -11.955 Td [(call)-525(psb_loc_to_glob\050x,)-525(desc_a,)-525(info,)-525(iact\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 446.403 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [-525(psb_max\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(An)-250(integer)-250(vector)-250(of)-250(indices)-250(to)-250(be)-250(converted.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.881 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 533.94 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 533.74 Td [(a)]TJ +/F62 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 485.92 Td [(desc)]TJ + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.956 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 333.945 486.119 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 314.597 cm +0 0 318.804 60.772 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 337.084 485.92 Td [(type)]TJ +/F102 8.9664 Tf 137.205 364.709 Td [(call)]TJ +0 g 0 G + [-525(psb_max\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -19.926 Td [(iact)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ + [(max_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + -23.537 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(x)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(If)]TJ/F52 9.9626 Tf 9.727 0 Td [(y)]TJ/F54 9.9626 Tf 8.032 0 Td [(is)-294(not)-294(pr)18(esent,)-304(then)]TJ/F52 9.9626 Tf 88.385 0 Td [(x)]TJ/F54 9.9626 Tf 8.132 0 Td [(is)-294(overwritten)-294(with)-293(the)-294(translated)-294(integer)-294(indices.)]TJ -99.332 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(y)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.958 Td [(call)]TJ +0 g 0 G + [-525(psb_max\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(If)]TJ/F52 9.9626 Tf 9.705 0 Td [(y)]TJ/F54 9.9626 Tf 8.011 0 Td [(is)-292(not)-291(pr)18(esent,)-302(then)]TJ/F52 9.9626 Tf 88.122 0 Td [(y)]TJ/F54 9.9626 Tf 8.011 0 Td [(is)-292(overwritten)-291(with)-292(the)-291(translated)-292(integer)-292(indice)1(s,)]TJ -99.463 -11.955 Td [(and)]TJ/F52 9.9626 Tf 19.651 0 Td [(x)]TJ/F54 9.9626 Tf 7.696 0 Td [(is)-250(left)-250(unchanged.)-310(Scope:)]TJ/F51 9.9626 Tf 112.557 0 Td [(global)]TJ/F54 9.9626 Tf -139.904 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 141.968 -114.535 Td [(99)]TJ + [(max_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -230.436 Td [(125)]TJ 0 g 0 G ET endstream endobj -1552 0 obj +1808 0 obj << -/Length 3169 +/Length 5813 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.19)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.12)-1000(psb)]TJ ET q 1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(is)]TJ -ET -q -1 0 0 1 217.809 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 221.396 706.129 Td [(owned)-250(\227)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_owned\050x,)-525(desc_a\051)]TJ +/F67 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_min\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -22.401 Td [(This)-328(subr)18(outine)-327(implements)-328(a)-328(minimum)-327(value)-328(r)18(eduction)-328(o)1(peration)-328(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(x)]TJ + 0 -20.408 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 545.696 Td [(a)]TJ +/F59 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 497.875 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 497.875 Td [(type)]TJ +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 9.9626 Tf -24.907 -20.408 Td [(root)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F91 10.3811 Tf 147.052 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ 0 g 0 G +/F59 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)-261(logical)-260(mask)-261(which)-261(is)-261(tr)8(ue)-261(if)]TJ/F52 9.9626 Tf 137.304 0 Td [(x)]TJ/F54 9.9626 Tf 7.803 0 Td [(is)-261(owned)-261(by)-260(the)-261(curr)18(ent)-261(pr)18(o-)]TJ -192.978 -11.955 Td [(cess)-250(Scope:)]TJ/F51 9.9626 Tf 51.567 0 Td [(local)]TJ/F54 9.9626 Tf -51.567 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ +/F59 9.9626 Tf -24.907 -32.364 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ 0 g 0 G - [-500(This)-240(r)18(outine)-239(r)18(eturns)-240(a)]TJ/F59 9.9626 Tf 109.67 0 Td [(.true.)]TJ/F54 9.9626 Tf 33.769 0 Td [(value)-240(for)-239(an)-240(index)-239(that)-240(is)-239(strictly)-240(owned)-239(by)]TJ -130.986 -11.955 Td [(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +/F59 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ 0 g 0 G - 139.477 -263.975 Td [(100)]TJ +0 g 0 G + 0 -20.408 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.781 0 Td [(.)]TJ +0 g 0 G + 79.264 -29.887 Td [(126)]TJ 0 g 0 G ET endstream endobj -1558 0 obj +1812 0 obj << -/Length 4795 +/Length 4946 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.20)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(owned)]TJ -ET -q -1 0 0 1 194.903 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 198.489 706.129 Td [(index)-250(\227)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -98.594 -18.964 Td [(call)-525(psb_owned_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ -ET -q -1 0 0 1 120.408 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 123.397 545.696 Td [(a)]TJ + -12.453 -19.926 Td [(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 497.875 Td [(desc)]TJ + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 333.945 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 441.123 cm +0 0 318.804 27.895 re f Q -BT -/F59 9.9626 Tf 337.084 497.875 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ -0 g 0 G -/F51 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ -0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - 0 -19.925 Td [(y)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(A)-200(logical)-200(mask)-200(which)-201(is)-200(tr)8(ue)-200(for)-200(all)-200(corr)18(esponding)-200(entries)-200(of)]TJ/F52 9.9626 Tf 260.812 0 Td [(x)]TJ/F54 9.9626 Tf 7.2 0 Td [(that)-200(ar)18(e)-200(owned)]TJ -253.626 -11.955 Td [(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F51 9.9626 Tf 131.027 0 Td [(local)]TJ/F54 9.9626 Tf -131.027 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 458.358 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ + [-525(psb_min\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(This)-429(r)18(outine)-428(r)18(eturns)-429(a)]TJ/F59 9.9626 Tf 117.209 0 Td [(.true.)]TJ/F54 9.9626 Tf 35.654 0 Td [(value)-429(for)-428(those)-429(indices)-429(that)-429(ar)18(e)-428(strictly)]TJ -140.41 -11.955 Td [(owned)-250(by)-250(the)-250(curr)18(ent)-250(pr)18(ocess,)-250(excluding)-250(the)-250(halo)-250(indices)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ 0 g 0 G - 139.477 -140.438 Td [(101)]TJ + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -ET - -endstream -endobj -1564 0 obj -<< -/Length 3147 ->> -stream 0 g 0 G +/F62 9.9626 Tf -48.393 -36.164 Td [(3.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.21)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(is)]TJ + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 217.809 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 124.802 326.552 cm +0 0 318.804 60.772 re f Q -BT -/F51 11.9552 Tf 221.396 706.129 Td [(local)-250(\227)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -70.691 -18.964 Td [(call)-525(psb_is_local\050x,)-525(desc_a\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 376.664 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [-525(psb_min\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 0 -19.925 Td [(x)]TJ + [(min_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(index.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(integer)74(.)]TJ + -23.537 -10.958 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 545.696 Td [(a)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 497.875 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 497.875 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ + [-525(psb_min\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(A)-244(logical)-244(mask)-243(which)-244(is)-244(tr)8(ue)-244(if)]TJ/F52 9.9626 Tf 136.118 0 Td [(x)]TJ/F54 9.9626 Tf 7.635 0 Td [(is)-244(local)-244(to)-243(the)-244(curr)18(ent)-244(pr)18(ocess)]TJ -191.623 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ + [(min_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-500(This)-199(r)18(outine)-200(r)18(eturns)-199(a)]TJ/F59 9.9626 Tf 108.069 0 Td [(.true.)]TJ/F54 9.9626 Tf 33.369 0 Td [(value)-199(for)-200(an)-199(index)-199(that)-200(is)-199(local)-200(to)-199(the)-199(curr)18(ent)]TJ -128.984 -11.955 Td [(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices)]TJ 0 g 0 G - 139.476 -263.975 Td [(102)]TJ +/F62 9.9626 Tf 103.537 -242.391 Td [(127)]TJ 0 g 0 G ET endstream endobj -1464 0 obj +1708 0 obj << /Type /ObjStm /N 100 -/First 976 -/Length 10636 ->> -stream -1461 0 1457 157 1458 304 1459 449 1463 595 344 654 1460 712 1466 806 1468 924 1469 982 -1470 1040 1471 1098 1472 1156 1473 1214 1474 1272 1475 1330 1465 1388 1479 1482 1476 1630 1477 1773 -1481 1920 348 1979 1478 2037 1485 2131 1482 2279 1483 2424 1487 2571 352 2629 1488 2686 1484 2744 -1492 2838 1489 2986 1490 3129 1494 3273 356 3332 1491 3390 1496 3497 1498 3615 1499 3673 1495 3730 -1504 3824 1501 3972 1502 4118 1506 4262 360 4321 1503 4379 1508 4499 1510 4617 1511 4675 1512 4733 -1507 4791 1517 4872 1513 5029 1514 5170 1515 5313 1519 5457 364 5516 1520 5574 1521 5633 1516 5692 -1525 5799 1522 5947 1523 6090 1527 6237 368 6295 1524 6352 1529 6446 1531 6564 372 6623 1528 6681 -1534 6788 1532 6927 1536 7074 376 7132 1533 7189 1539 7296 1541 7414 1542 7473 1543 7532 1538 7591 -1546 7672 1544 7811 1548 7958 380 8016 1545 8073 1551 8180 1549 8319 1553 8466 384 8525 1554 8583 -1550 8642 1557 8749 1555 8888 1559 9035 388 9093 1560 9150 1556 9208 1563 9315 1561 9454 1565 9601 -% 1461 0 obj +/First 970 +/Length 9209 +>> +stream +1704 0 1710 107 1712 225 417 283 1709 340 1715 447 1717 565 1718 624 1719 683 1720 742 +1721 801 1722 860 1723 919 1724 978 1725 1037 1726 1096 1714 1155 1728 1275 1730 1393 421 1451 +1727 1508 1732 1589 1734 1707 425 1766 1735 1824 1736 1883 1731 1942 1738 2075 1740 2193 429 2251 +1741 2308 1742 2365 1737 2421 1744 2554 1746 2672 433 2731 1747 2789 1748 2848 1749 2907 1743 2966 +1751 3099 1753 3217 437 3275 1750 3332 1756 3426 1758 3544 441 3603 1755 3661 1760 3781 1762 3899 +445 3957 1759 4014 1764 4108 1766 4226 449 4285 1763 4343 1768 4437 1770 4555 453 4613 1767 4670 +1772 4764 1774 4882 457 4941 1771 4999 1776 5146 1778 5264 1779 5322 1780 5380 1781 5438 1775 5496 +1785 5633 1787 5751 461 5810 1784 5868 1789 6015 1791 6133 1792 6191 1793 6249 1794 6307 1788 6365 +1796 6502 1798 6620 465 6679 1795 6737 1800 6884 1802 7002 1803 7060 1804 7118 1805 7176 1799 7233 +1807 7370 1809 7488 469 7547 1806 7605 1811 7752 1813 7870 1814 7928 1815 7986 1816 8044 1810 8102 +% 1704 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 1710 0 obj << /Type /Page -/Contents 1462 0 R -/Resources 1460 0 R +/Contents 1711 0 R +/Resources 1709 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1457 0 R 1458 0 R 1459 0 R ] +/Parent 1713 0 R >> -% 1457 0 obj +% 1712 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 572.168 409.811 584.228] -/A << /S /GoTo /D (descdata) >> +/D [1710 0 R /XYZ 98.895 753.953 null] >> -% 1458 0 obj +% 417 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 276.913 420.271 288.973] -/A << /S /GoTo /D (spdata) >> +/D [1710 0 R /XYZ 99.895 716.092 null] >> -% 1459 0 obj +% 1709 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [345.92 208.672 412.978 220.731] -/A << /S /GoTo /D (descdata) >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1463 0 obj +% 1715 0 obj << -/D [1461 0 R /XYZ 149.705 753.953 null] +/Type /Page +/Contents 1716 0 R +/Resources 1714 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1713 0 R >> -% 344 0 obj +% 1717 0 obj << -/D [1461 0 R /XYZ 150.705 716.092 null] +/D [1715 0 R /XYZ 149.705 753.953 null] >> -% 1460 0 obj +% 1718 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1715 0 R /XYZ 150.705 701.929 null] >> -% 1466 0 obj +% 1719 0 obj << -/Type /Page -/Contents 1467 0 R -/Resources 1465 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R +/D [1715 0 R /XYZ 150.705 668.729 null] >> -% 1468 0 obj +% 1720 0 obj << -/D [1466 0 R /XYZ 98.895 753.953 null] +/D [1715 0 R /XYZ 150.705 624.894 null] >> -% 1469 0 obj +% 1721 0 obj << -/D [1466 0 R /XYZ 99.895 701.929 null] +/D [1715 0 R /XYZ 150.705 555.872 null] >> -% 1470 0 obj +% 1722 0 obj << -/D [1466 0 R /XYZ 99.895 667.454 null] +/D [1715 0 R /XYZ 150.705 500.082 null] >> -% 1471 0 obj +% 1723 0 obj << -/D [1466 0 R /XYZ 99.895 647.529 null] +/D [1715 0 R /XYZ 150.705 468.201 null] >> -% 1472 0 obj +% 1724 0 obj << -/D [1466 0 R /XYZ 99.895 603.693 null] +/D [1715 0 R /XYZ 150.705 425.023 null] >> -% 1473 0 obj +% 1725 0 obj << -/D [1466 0 R /XYZ 99.895 547.902 null] +/D [1715 0 R /XYZ 150.705 382.522 null] >> -% 1474 0 obj +% 1726 0 obj << -/D [1466 0 R /XYZ 99.895 527.977 null] +/D [1715 0 R /XYZ 150.705 354.627 null] >> -% 1475 0 obj +% 1714 0 obj << -/D [1466 0 R /XYZ 99.895 496.097 null] +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1465 0 obj +% 1728 0 obj +<< +/Type /Page +/Contents 1729 0 R +/Resources 1727 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1713 0 R +>> +% 1730 0 obj +<< +/D [1728 0 R /XYZ 98.895 753.953 null] +>> +% 421 0 obj +<< +/D [1728 0 R /XYZ 99.895 716.092 null] +>> +% 1727 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1479 0 obj +% 1732 0 obj << /Type /Page -/Contents 1480 0 R -/Resources 1478 0 R +/Contents 1733 0 R +/Resources 1731 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1476 0 R 1477 0 R ] +/Parent 1713 0 R >> -% 1476 0 obj +% 1734 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 420.271 585.83] -/A << /S /GoTo /D (spdata) >> +/D [1732 0 R /XYZ 149.705 753.953 null] >> -% 1477 0 obj +% 425 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 506.024 409.811 518.084] -/A << /S /GoTo /D (descdata) >> +/D [1732 0 R /XYZ 150.705 716.092 null] >> -% 1481 0 obj +% 1735 0 obj << -/D [1479 0 R /XYZ 149.705 753.953 null] +/D [1732 0 R /XYZ 150.705 222.691 null] >> -% 348 0 obj +% 1736 0 obj << -/D [1479 0 R /XYZ 150.705 716.092 null] +/D [1732 0 R /XYZ 150.705 200.171 null] >> -% 1478 0 obj +% 1731 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F93 915 0 R /F60 666 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1485 0 obj +% 1738 0 obj << /Type /Page -/Contents 1486 0 R -/Resources 1484 0 R +/Contents 1739 0 R +/Resources 1737 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1482 0 R 1483 0 R ] ->> -% 1482 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 559.823 369.462 571.882] -/A << /S /GoTo /D (spdata) >> +/Parent 1713 0 R >> -% 1483 0 obj +% 1740 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 492.077 359.001 504.136] -/A << /S /GoTo /D (descdata) >> +/D [1738 0 R /XYZ 98.895 753.953 null] >> -% 1487 0 obj +% 429 0 obj << -/D [1485 0 R /XYZ 98.895 753.953 null] +/D [1738 0 R /XYZ 99.895 716.092 null] >> -% 352 0 obj +% 1741 0 obj << -/D [1485 0 R /XYZ 99.895 716.092 null] +/D [1738 0 R /XYZ 99.895 348.22 null] >> -% 1488 0 obj +% 1742 0 obj << -/D [1485 0 R /XYZ 99.895 312.355 null] +/D [1738 0 R /XYZ 99.895 313.8 null] >> -% 1484 0 obj +% 1737 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1492 0 obj +% 1744 0 obj << /Type /Page -/Contents 1493 0 R -/Resources 1491 0 R +/Contents 1745 0 R +/Resources 1743 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1456 0 R -/Annots [ 1489 0 R 1490 0 R ] +/Parent 1713 0 R >> -% 1489 0 obj +% 1746 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 577.893 378 589.953] -/A << /S /GoTo /D (descdata) >> +/D [1744 0 R /XYZ 149.705 753.953 null] >> -% 1490 0 obj +% 433 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [261.249 128.475 337.303 140.535] -/A << /S /GoTo /D (vdata) >> +/D [1744 0 R /XYZ 150.705 716.092 null] >> -% 1494 0 obj +% 1747 0 obj << -/D [1492 0 R /XYZ 149.705 753.953 null] +/D [1744 0 R /XYZ 150.705 441.869 null] >> -% 356 0 obj +% 1748 0 obj << -/D [1492 0 R /XYZ 150.705 716.092 null] +/D [1744 0 R /XYZ 150.705 395.439 null] >> -% 1491 0 obj +% 1749 0 obj +<< +/D [1744 0 R /XYZ 150.705 363.559 null] +>> +% 1743 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1496 0 obj +% 1751 0 obj << /Type /Page -/Contents 1497 0 R -/Resources 1495 0 R +/Contents 1752 0 R +/Resources 1750 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R +/Parent 1754 0 R >> -% 1498 0 obj +% 1753 0 obj << -/D [1496 0 R /XYZ 98.895 753.953 null] +/D [1751 0 R /XYZ 98.895 753.953 null] >> -% 1499 0 obj +% 437 0 obj << -/D [1496 0 R /XYZ 99.895 632.19 null] +/D [1751 0 R /XYZ 99.895 716.092 null] >> -% 1495 0 obj +% 1750 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1504 0 obj +% 1756 0 obj << /Type /Page -/Contents 1505 0 R -/Resources 1503 0 R +/Contents 1757 0 R +/Resources 1755 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R -/Annots [ 1501 0 R 1502 0 R ] +/Parent 1754 0 R >> -% 1501 0 obj +% 1758 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 362.621 409.811 374.68] -/A << /S /GoTo /D (descdata) >> +/D [1756 0 R /XYZ 149.705 753.953 null] >> -% 1502 0 obj +% 441 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [417.183 207.637 493.237 219.697] -/A << /S /GoTo /D (vdata) >> +/D [1756 0 R /XYZ 150.705 716.092 null] >> -% 1506 0 obj +% 1755 0 obj << -/D [1504 0 R /XYZ 149.705 753.953 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R >> +/ProcSet [ /PDF /Text ] >> -% 360 0 obj +% 1760 0 obj +<< +/Type /Page +/Contents 1761 0 R +/Resources 1759 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1754 0 R +>> +% 1762 0 obj << -/D [1504 0 R /XYZ 150.705 716.092 null] +/D [1760 0 R /XYZ 98.895 753.953 null] >> -% 1503 0 obj +% 445 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/D [1760 0 R /XYZ 99.895 716.092 null] +>> +% 1759 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1508 0 obj +% 1764 0 obj << /Type /Page -/Contents 1509 0 R -/Resources 1507 0 R +/Contents 1765 0 R +/Resources 1763 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R ->> -% 1510 0 obj -<< -/D [1508 0 R /XYZ 98.895 753.953 null] +/Parent 1754 0 R >> -% 1511 0 obj +% 1766 0 obj << -/D [1508 0 R /XYZ 99.895 701.929 null] +/D [1764 0 R /XYZ 149.705 753.953 null] >> -% 1512 0 obj +% 449 0 obj << -/D [1508 0 R /XYZ 99.895 680.684 null] +/D [1764 0 R /XYZ 150.705 716.092 null] >> -% 1507 0 obj +% 1763 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1517 0 obj +% 1768 0 obj << /Type /Page -/Contents 1518 0 R -/Resources 1516 0 R +/Contents 1769 0 R +/Resources 1767 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R -/Annots [ 1513 0 R 1514 0 R 1515 0 R ] ->> -% 1513 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [310.942 573.77 378 585.83] -/A << /S /GoTo /D (descdata) >> +/Parent 1754 0 R >> -% 1514 0 obj +% 1770 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [322.33 482.114 398.384 494.174] -/A << /S /GoTo /D (vdata) >> +/D [1768 0 R /XYZ 98.895 753.953 null] >> -% 1515 0 obj +% 453 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [225.165 380.495 301.219 392.555] -/A << /S /GoTo /D (vdata) >> +/D [1768 0 R /XYZ 99.895 716.092 null] >> -% 1519 0 obj +% 1767 0 obj << -/D [1517 0 R /XYZ 149.705 753.953 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 364 0 obj +% 1772 0 obj << -/D [1517 0 R /XYZ 150.705 716.092 null] +/Type /Page +/Contents 1773 0 R +/Resources 1771 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1754 0 R >> -% 1520 0 obj +% 1774 0 obj << -/D [1517 0 R /XYZ 150.705 278.482 null] +/D [1772 0 R /XYZ 149.705 753.953 null] >> -% 1521 0 obj +% 457 0 obj << -/D [1517 0 R /XYZ 150.705 244.007 null] +/D [1772 0 R /XYZ 150.705 716.092 null] >> -% 1516 0 obj +% 1771 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1525 0 obj +% 1776 0 obj << /Type /Page -/Contents 1526 0 R -/Resources 1524 0 R +/Contents 1777 0 R +/Resources 1775 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R -/Annots [ 1522 0 R 1523 0 R ] +/Parent 1783 0 R >> -% 1522 0 obj +% 1778 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [174.355 561.815 250.41 573.875] -/A << /S /GoTo /D (vdata) >> +/D [1776 0 R /XYZ 98.895 753.953 null] >> -% 1523 0 obj +% 1779 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [260.133 482.114 327.191 494.174] -/A << /S /GoTo /D (descdata) >> +/D [1776 0 R /XYZ 99.895 528.579 null] >> -% 1527 0 obj +% 1780 0 obj << -/D [1525 0 R /XYZ 98.895 753.953 null] +/D [1776 0 R /XYZ 99.895 494.104 null] >> -% 368 0 obj +% 1781 0 obj << -/D [1525 0 R /XYZ 99.895 716.092 null] +/D [1776 0 R /XYZ 99.895 403.265 null] >> -% 1524 0 obj +% 1775 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1529 0 obj +% 1785 0 obj << /Type /Page -/Contents 1530 0 R -/Resources 1528 0 R +/Contents 1786 0 R +/Resources 1784 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1500 0 R +/Parent 1783 0 R >> -% 1531 0 obj +% 1787 0 obj << -/D [1529 0 R /XYZ 149.705 753.953 null] +/D [1785 0 R /XYZ 149.705 753.953 null] >> -% 372 0 obj +% 461 0 obj << -/D [1529 0 R /XYZ 150.705 716.092 null] +/D [1785 0 R /XYZ 150.705 716.092 null] >> -% 1528 0 obj +% 1784 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1534 0 obj +% 1789 0 obj << /Type /Page -/Contents 1535 0 R -/Resources 1533 0 R +/Contents 1790 0 R +/Resources 1788 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1532 0 R ] +/Parent 1783 0 R >> -% 1532 0 obj +% 1791 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 483.894 359.001 495.954] -/A << /S /GoTo /D (descdata) >> +/D [1789 0 R /XYZ 98.895 753.953 null] >> -% 1536 0 obj +% 1792 0 obj << -/D [1534 0 R /XYZ 98.895 753.953 null] +/D [1789 0 R /XYZ 99.895 552.489 null] >> -% 376 0 obj +% 1793 0 obj << -/D [1534 0 R /XYZ 99.895 716.092 null] +/D [1789 0 R /XYZ 99.895 518.014 null] >> -% 1533 0 obj +% 1794 0 obj +<< +/D [1789 0 R /XYZ 99.895 427.175 null] +>> +% 1788 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1539 0 obj +% 1796 0 obj << /Type /Page -/Contents 1540 0 R -/Resources 1538 0 R +/Contents 1797 0 R +/Resources 1795 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R ->> -% 1541 0 obj -<< -/D [1539 0 R /XYZ 149.705 753.953 null] +/Parent 1783 0 R >> -% 1542 0 obj +% 1798 0 obj << -/D [1539 0 R /XYZ 150.705 716.092 null] +/D [1796 0 R /XYZ 149.705 753.953 null] >> -% 1543 0 obj +% 465 0 obj << -/D [1539 0 R /XYZ 150.705 687.379 null] +/D [1796 0 R /XYZ 150.705 716.092 null] >> -% 1538 0 obj +% 1795 0 obj << -/Font << /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1546 0 obj +% 1800 0 obj << /Type /Page -/Contents 1547 0 R -/Resources 1545 0 R +/Contents 1801 0 R +/Resources 1799 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1544 0 R ] +/Parent 1783 0 R >> -% 1544 0 obj +% 1802 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 482.114 359.001 494.174] -/A << /S /GoTo /D (descdata) >> +/D [1800 0 R /XYZ 98.895 753.953 null] >> -% 1548 0 obj +% 1803 0 obj << -/D [1546 0 R /XYZ 98.895 753.953 null] +/D [1800 0 R /XYZ 99.895 540.534 null] >> -% 380 0 obj +% 1804 0 obj << -/D [1546 0 R /XYZ 99.895 716.092 null] +/D [1800 0 R /XYZ 99.895 506.059 null] >> -% 1545 0 obj +% 1805 0 obj +<< +/D [1800 0 R /XYZ 99.895 415.22 null] +>> +% 1799 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1551 0 obj +% 1807 0 obj << /Type /Page -/Contents 1552 0 R -/Resources 1550 0 R +/Contents 1808 0 R +/Resources 1806 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1549 0 R ] ->> -% 1549 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 494.069 409.811 506.129] -/A << /S /GoTo /D (descdata) >> ->> -% 1553 0 obj -<< -/D [1551 0 R /XYZ 149.705 753.953 null] +/Parent 1783 0 R >> -% 384 0 obj +% 1809 0 obj << -/D [1551 0 R /XYZ 150.705 716.092 null] +/D [1807 0 R /XYZ 149.705 753.953 null] >> -% 1554 0 obj +% 469 0 obj << -/D [1551 0 R /XYZ 150.705 382.093 null] +/D [1807 0 R /XYZ 150.705 716.092 null] >> -% 1550 0 obj +% 1806 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1557 0 obj +% 1811 0 obj << /Type /Page -/Contents 1558 0 R -/Resources 1556 0 R +/Contents 1812 0 R +/Resources 1810 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1555 0 R ] +/Parent 1817 0 R >> -% 1555 0 obj +% 1813 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 494.069 359.001 506.129] -/A << /S /GoTo /D (descdata) >> +/D [1811 0 R /XYZ 98.895 753.953 null] >> -% 1559 0 obj +% 1814 0 obj << -/D [1557 0 R /XYZ 98.895 753.953 null] +/D [1811 0 R /XYZ 99.895 552.489 null] >> -% 388 0 obj +% 1815 0 obj << -/D [1557 0 R /XYZ 99.895 716.092 null] +/D [1811 0 R /XYZ 99.895 518.014 null] >> -% 1560 0 obj +% 1816 0 obj << -/D [1557 0 R /XYZ 99.895 258.556 null] +/D [1811 0 R /XYZ 99.895 427.175 null] >> -% 1556 0 obj +% 1810 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1563 0 obj + +endstream +endobj +1820 0 obj << -/Type /Page -/Contents 1564 0 R -/Resources 1562 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1537 0 R -/Annots [ 1561 0 R ] +/Length 5616 >> -% 1561 0 obj +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(7.13)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amx\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -24.611 Td [(This)-342(subr)18(outine)-342(implements)-342(a)-342(maximum)-341(absolute)-342(value)-342(r)18(eduction)-342(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.619 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F91 10.3811 Tf 147.052 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F59 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ +0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 164.384 -29.887 Td [(128)]TJ +0 g 0 G +ET + +endstream +endobj +1825 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 494.069 409.811 506.129] -/A << /S /GoTo /D (descdata) >> +/Length 5225 >> -% 1565 0 obj +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 417.212 cm +0 0 318.804 27.895 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 434.448 Td [(call)]TJ +0 g 0 G + [-525(psb_amx\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ +0 g 0 G + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf -48.393 -36.165 Td [(3.)]TJ +0 g 0 G + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 302.642 cm +0 0 318.804 60.772 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 127.791 352.754 Td [(call)]TJ +0 g 0 G + [-525(psb_amx\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amx_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + -14.122 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_amx\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amx_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -218.48 Td [(129)]TJ +0 g 0 G +ET + +endstream +endobj +1832 0 obj +<< +/Length 5619 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 11.9552 Tf 150.705 706.129 Td [(7.14)-1000(psb)]TJ +ET +q +1 0 0 1 204.216 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 207.803 706.129 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ +0 g 0 G +0 g 0 G +/F67 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amn\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -24.611 Td [(This)-360(subr)18(outine)-360(impl)1(ements)-360(a)-360(minimum)-360(absolute)-360(value)-359(r)18(eduction)-360(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0 g 0 G +/F59 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +0 g 0 G +0 g 0 G + 0 -22.619 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F91 10.3811 Tf 147.052 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F59 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ +0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 164.384 -29.887 Td [(130)]TJ +0 g 0 G +ET + +endstream +endobj +1836 0 obj << -/D [1563 0 R /XYZ 149.705 753.953 null] +/Length 5262 >> +stream +0 g 0 G +0 g 0 G +0 g 0 G +BT +/F59 9.9626 Tf 99.895 706.129 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(2.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 417.212 cm +0 0 318.804 27.895 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 434.448 Td [(call)]TJ +0 g 0 G + [-525(psb_amn\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ +0 g 0 G + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf -48.393 -36.165 Td [(3.)]TJ +0 g 0 G + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 124.802 302.642 cm +0 0 318.804 60.772 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 137.205 352.754 Td [(call)]TJ +0 g 0 G + [-525(psb_amn\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amn_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + -23.537 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_amn\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(amn_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 103.537 -218.48 Td [(131)]TJ +0 g 0 G +ET endstream endobj -1571 0 obj +1843 0 obj << -/Length 4785 +/Length 5776 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.22)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(local)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.15)-1000(psb)]TJ ET q -1 0 0 1 183.605 706.328 cm +1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 187.192 706.129 Td [(index)-250(\227)]TJ +/F59 11.9552 Tf 207.803 706.129 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(reduction)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -87.297 -18.964 Td [(call)-525(psb_local_index\050y,)-525(x,)-525(desc_a,)-525(info\051)]TJ +/F67 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_nrm2\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F62 9.9626 Tf 14.944 -22.401 Td [(This)-297(subr)18(outine)-296(implements)-297(a)-297(2-norm)-296(value)-297(r)18(eduction)-297(operation)-296(based)-297(on)]TJ -14.944 -11.955 Td [(the)-250(underlying)-250(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(x)]TJ + 0 -20.408 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-370(as:)-551(a)-371(r)18(eal)-370(variable,)-401(which)-370(may)-371(be)-370(a)-371(scalar)74(,)-400(or)-371(a)-370(rank)-371(1)-370(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.408 Td [(root)]TJ +0 g 0 G +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F91 10.3811 Tf 147.052 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F91 10.3811 Tf 131.101 0 Td [(\000)]TJ/F62 9.9626 Tf 8.195 0 Td [(1)]TJ/F69 10.3811 Tf 7.873 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F69 10.3811 Tf 19.923 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +0 g 0 G +/F59 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ +0 g 0 G +/F62 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F67 9.9626 Tf 199.497 0 Td [(OR)]TJ/F62 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F67 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F62 9.9626 Tf 109.837 0 Td [(,)]TJ/F67 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F62 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -20.408 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(2-norm)-250(r)18(eduction.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.781 0 Td [(.)]TJ +0 g 0 G + 79.264 -29.887 Td [(132)]TJ +0 g 0 G +ET + +endstream +endobj +1847 0 obj +<< +/Length 6252 +>> +stream +0 g 0 G +0 g 0 G +BT +/F62 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(variable,)-250(which)-250(may)-250(be)-250(a)-250(scalar)74(,)-250(or)-250(a)-250(rank)-250(1)-250(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0 g 0 G +/F62 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(If)]TJ/F67 9.9626 Tf 8.943 0 Td [(mode)]TJ/F62 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(This)-345(r)18(eduction)-345(is)-346(appr)18(opriate)-345(to)-345(compute)-345(the)-345(r)18(esults)-346(of)-345(multiple)-345(\050local\051)]TJ 12.453 -11.955 Td [(NRM2)-250(operations)-250(at)-250(the)-250(same)-250(time.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Integer)-250(indices.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in,)-250(inout)]TJ/F54 9.9626 Tf 38.735 0 Td [(.)]TJ -70.535 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(a)-250(rank)-250(one)-250(integer)-250(array)111(.)]TJ + -12.453 -19.925 Td [(2.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(desc)]TJ + [-500(Denoting)-249(by)]TJ/F60 9.9626 Tf 69.789 0 Td [(d)-40(a)-25(t)]TJ/F60 7.5716 Tf 13.536 -1.96 Td [(i)]TJ/F62 9.9626 Tf 5.23 1.96 Td [(the)-249(value)-249(of)-248(the)-249(variable)]TJ/F60 9.9626 Tf 108.808 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 15.973 0 Td [(on)-249(pr)18(ocess)]TJ/F60 9.9626 Tf 49.078 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(,)-249(the)-249(output)]TJ/F60 9.9626 Tf 53.71 0 Td [(r)-17(e)-25(s)]TJ/F62 9.9626 Tf -306.635 -11.955 Td [(is)-250(equivalent)-250(to)-250(the)-250(computation)-250(of)]TJ/F60 9.9626 Tf 124.796 -25.468 Td [(r)-17(e)-25(s)]TJ/F93 10.3811 Tf 15.061 0 Td [(=)]TJ/F17 9.9626 Tf 11.086 10.922 Td [(r)]TJ ET q -1 0 0 1 120.408 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 285.832 490.532 cm +[]0 d 0 J 0.389 w 0 0 m 30.512 0 l S Q BT -/F51 9.9626 Tf 123.397 545.696 Td [(a)]TJ +/F104 13.9477 Tf 285.957 477.344 Td [(\345)]TJ/F60 7.5716 Tf 4.245 -8.764 Td [(i)]TJ/F60 9.9626 Tf 8.364 10.836 Td [(d)-40(a)-25(t)]TJ/F62 7.5716 Tf 13.495 3.473 Td [(2)]TJ/F60 7.5716 Tf 0.041 -7.027 Td [(i)]TJ/F62 9.9626 Tf 4.243 3.554 Td [(,)]TJ -191.543 -30.806 Td [(with)-250(car)18(e)-250(taken)-250(to)-250(avoid)-250(unnecessary)-250(over\003ow)92(.)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ + -12.453 -19.926 Td [(3.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 31.023 0 Td [(dat)]TJ/F62 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0 g 0 G + -12.453 -19.925 Td [(4.)]TJ +0 g 0 G + [-500(The)]TJ/F67 9.9626 Tf 32.225 0 Td [(mode)]TJ/F62 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F67 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F62 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F67 9.9626 Tf 16.309 0 Td [(request)]TJ/F62 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 309.258 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 333.043 cm +0 0 318.804 27.895 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 312.397 497.875 Td [(desc)]TJ +/F102 8.9664 Tf 137.205 350.279 Td [(call)]TJ +0 g 0 G + [-525(psb_nrm2\050ctxt,dat,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(ior)]TJ +0 g 0 G + [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf -48.393 -36.165 Td [(5.)]TJ +0 g 0 G + [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F67 9.9626 Tf 216.877 0 Td [(dat)]TJ/F62 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F60 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F62 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 333.945 498.074 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 124.802 218.473 cm +0 0 318.804 60.772 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 337.084 497.875 Td [(type)]TJ +/F102 8.9664 Tf 127.791 268.585 Td [(call)]TJ +0 g 0 G + [-525(psb_nrm2\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(psb_collective_start_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(iact)]TJ + 23.536 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(speci\002es)-250(action)-250(to)-250(be)-250(taken)-250(in)-250(case)-250(of)-250(range)-250(err)18(ors.)-310(Scope:)]TJ/F51 9.9626 Tf 253.796 0 Td [(global)]TJ/F54 9.9626 Tf -249.91 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-190(as:)-280(a)-190(character)-190(variable)]TJ/F59 9.9626 Tf 143.341 0 Td [(I)]TJ/F54 9.9626 Tf 5.23 0 Td [(gnor)18(e,)]TJ/F59 9.9626 Tf 29.808 0 Td [(W)]TJ/F54 9.9626 Tf 5.231 0 Td [(arning)-190(or)]TJ/F59 9.9626 Tf 42.111 0 Td [(A)]TJ/F54 9.9626 Tf 5.231 0 Td [(bort,)-202(default)]TJ/F59 9.9626 Tf 55.839 0 Td [(I)]TJ/F54 9.9626 Tf 5.231 0 Td [(gnor)18(e.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -316.929 -21.918 Td [(On)-250(Return)]TJ + [(nrm2_request\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + -14.122 -10.959 Td [(.......)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(y)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf -37.659 -10.959 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 10.521 0 Td [(A)-270(logical)-270(mask)-270(which)-271(is)-270(tr)8(ue)-270(for)-270(all)-270(corr)18(esponding)-270(entries)-270(of)]TJ/F52 9.9626 Tf 268.484 0 Td [(x)]TJ/F54 9.9626 Tf 7.897 0 Td [(that)-270(ar)18(e)-270(local)]TJ -261.995 -11.955 Td [(to)-250(the)-250(curr)18(ent)-250(pr)18(ocess)-250(Scope:)]TJ/F51 9.9626 Tf 128.666 0 Td [(local)]TJ/F54 9.9626 Tf -128.666 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(scalar)-250(or)-250(rank)-250(one)-250(logical)-250(array)111(.)]TJ + [-525(psb_nrm2\050ctxt,dat,mode)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ + [(psb_collective_end_,&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ + 23.537 -10.959 Td [(&)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.926 Td [(1.)]TJ + [-525(request)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(This)-264(r)18(outine)-265(r)18(eturns)-264(a)]TJ/F59 9.9626 Tf 110.663 0 Td [(.true.)]TJ/F54 9.9626 Tf 34.017 0 Td [(value)-264(for)-265(those)-264(indices)-265(that)-264(ar)18(e)-265(local)-264(to)-265(the)]TJ -132.227 -11.955 Td [(curr)18(ent)-250(pr)18(ocess,)-250(including)-250(the)-250(halo)-250(indices.)]TJ + [(nrm2_request\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - 139.477 -140.438 Td [(103)]TJ +0 g 0 G +/F62 9.9626 Tf 103.537 -134.311 Td [(133)]TJ 0 g 0 G ET endstream endobj -1578 0 obj +1858 0 obj << -/Length 3647 +/Length 5352 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.23)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(7.16)-1000(psb)]TJ ET q 1 0 0 1 204.216 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(get)]TJ -ET -q -1 0 0 1 225.126 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 228.712 706.129 Td [(boundary)-250(\227)-250(Extract)-250(list)-250(of)-250(boundary)-250(elements)]TJ -0 g 0 G +/F59 11.9552 Tf 207.803 706.129 Td [(snd)-250(\227)-250(Send)-250(data)]TJ 0 g 0 G -/F59 9.9626 Tf -78.007 -18.964 Td [(call)-525(psb_get_boundary\050bndel,)-525(desc,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_snd\050ctxt,)-525(dat,)-525(dst,)-525(m\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(sends)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F59 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -19.925 Td [(desc)]TJ 0 g 0 G -/F54 9.9626 Tf 24.896 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 577.576 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 577.576 Td [(type)]TJ + 0 -19.926 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.907 -19.926 Td [(dat)]TJ 0 g 0 G +/F62 9.9626 Tf 19.367 0 Td [(The)-250(data)-250(to)-250(be)-250(sent.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.956 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F60 9.9626 Tf 197.687 0 Td [(m)]TJ/F62 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ 0 g 0 G - 0 -19.925 Td [(bndel)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(dst)]TJ 0 g 0 G -/F54 9.9626 Tf 31.541 0 Td [(The)-307(list)-307(of)-307(boundary)-307(elements)-307(on)-306(the)-307(calling)-307(pr)18(ocess,)-321(in)-307(local)-307(number)18(-)]TJ -6.635 -11.955 Td [(ing.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(a)-1(r)1(ray)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ +/F62 9.9626 Tf 18.809 0 Td [(Destination)-250(pr)18(ocess.)]TJ 6.098 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(d)-25(s)-25(t)]TJ/F69 10.3811 Tf 15.689 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(info)]TJ +/F59 9.9626 Tf -241.89 -31.88 Td [(m)]TJ 0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(m)]TJ/F69 10.3811 Tf 10.767 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 15.94 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F93 10.3811 Tf 9.257 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.955 Td [(When)]TJ/F60 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F60 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 15.94 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F93 10.3811 Tf 9.257 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G - [-500(If)-219(ther)18(e)-220(ar)18(e)-219(no)-220(boundary)-219(elements)-219(\050i.e.,)-226(if)-219(the)-220(local)-219(part)-219(of)-220(the)-219(connectivity)]TJ 12.453 -11.956 Td [(graph)-391(is)-392(self-contained\051)-391(the)-392(output)-391(vector)-391(is)-392(set)-391(to)-391(the)-392(\223not)-391(allocated\224)]TJ 0 -11.955 Td [(state.)]TJ +/F59 11.9552 Tf 0 -21.918 Td [(Notes)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ +/F62 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ 0 g 0 G - [-500(Otherwise)-206(the)-205(size)-206(of)]TJ/F59 9.9626 Tf 105.891 0 Td [(bndel)]TJ/F54 9.9626 Tf 28.201 0 Td [(will)-206(be)-205(exactly)-206(equal)-206(to)-206(the)-205(number)-206(of)-206(bound-)]TJ -121.639 -11.955 Td [(ary)-250(elements.)]TJ + [-500(This)-292(subr)18(outine)-292(impl)1(ies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.454 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(destination)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 158.309 0 Td [(d)-25(s)-25(t)]TJ/F62 9.9626 Tf 12.797 0 Td [(.)]TJ 0 g 0 G - 139.477 -196.229 Td [(104)]TJ + -31.629 -104.573 Td [(134)]TJ 0 g 0 G ET endstream endobj -1585 0 obj +1863 0 obj << -/Length 3458 +/Length 5356 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.24)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(7.17)-1000(psb)]TJ ET q 1 0 0 1 153.407 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(get)]TJ -ET -q -1 0 0 1 174.316 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 177.903 706.129 Td [(overlap)-250(\227)-250(Extract)-250(list)-250(of)-250(overlap)-250(elements)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(rcv)-250(\227)-250(Receive)-250(data)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -78.008 -18.964 Td [(call)-525(psb_get_overlap\050ovrel,)-525(desc,)-525(info\051)]TJ +/F67 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_rcv\050ctxt,)-525(dat,)-525(src,)-525(m\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(r)18(eceives)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(desc)]TJ + 0 -19.926 Td [(ctxt)]TJ +0 g 0 G +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.926 Td [(src)]TJ +0 g 0 G +/F62 9.9626 Tf 17.704 0 Td [(Sour)18(ce)-250(pr)18(ocess.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(s)-15(r)-17(c)]TJ/F69 10.3811 Tf 15.141 0 Td [(<)]TJ/F93 10.3811 Tf 8.318 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 13.504 0 Td [(\000)]TJ/F62 9.9626 Tf 10.131 0 Td [(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -241.342 -31.881 Td [(m)]TJ +0 g 0 G +/F62 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F69 10.3811 Tf 138.85 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(m)]TJ/F69 10.3811 Tf 10.767 0 Td [(<)]TJ/F93 10.3811 Tf 8.319 0 Td [(=)]TJ/F60 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 15.94 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F93 10.3811 Tf 9.257 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.956 Td [(When)]TJ/F60 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F60 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 15.94 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F62 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F93 10.3811 Tf 9.257 0 Td [(\051)]TJ/F62 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(dat)]TJ +0 g 0 G +/F62 9.9626 Tf 19.368 0 Td [(The)-250(data)-250(to)-250(be)-250(r)18(eceived.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.955 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F60 9.9626 Tf 197.687 0 Td [(m)]TJ/F62 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0 g 0 G +/F62 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0 g 0 G + [-500(This)-292(subr)18(outine)-291(implies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.453 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(sour)18(ce)-250(pr)18(ocess)]TJ/F60 9.9626 Tf 137.538 0 Td [(s)-15(r)-17(c)]TJ/F62 9.9626 Tf 12.249 0 Td [(.)]TJ +0 g 0 G + -10.31 -104.573 Td [(135)]TJ 0 g 0 G -/F54 9.9626 Tf 24.897 0 Td [(the)-250(communication)-250(descriptor)74(.)]TJ 0.01 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ ET -q -1 0 0 1 309.258 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +1870 0 obj +<< +/Length 6241 +>> +stream +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 312.397 577.576 Td [(desc)]TJ +/F59 14.3462 Tf 150.705 705.784 Td [(8)-1000(Error)-250(handling)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-382(PSBLAS)-382(library)-381(err)18(or)-382(handling)-382(policy)-382(has)-382(been)-382(comple)1(tely)-382(r)18(ewritten)-382(in)]TJ 0 -11.955 Td [(version)-359(2.0.)-638(The)-359(idea)-359(behind)-360(the)-359(design)-359(of)-359(this)-359(new)-360(err)18(or)-359(handling)-359(strategy)]TJ 0 -11.955 Td [(is)-303(to)-303(keep)-303(err)18(or)-303(messages)-303(on)-303(a)-303(stack)-303(allowing)-303(the)-303(user)-303(to)-303(trace)-303(back)-303(up)-303(to)-303(the)]TJ 0 -11.955 Td [(point)-317(wher)18(e)-318(the)-317(\002rst)-317(err)18(or)-318(message)-317(has)-318(been)-317(generated.)-512(Every)-317(r)18(outine)-318(in)-317(the)]TJ 0 -11.956 Td [(PSBLAS-2.0)-336(library)-336(has,)-358(as)-336(last)-337(non-optional)-336(ar)18(gument,)-358(an)-336(integer)]TJ/F67 9.9626 Tf 298.678 0 Td [(info)]TJ/F62 9.9626 Tf 24.271 0 Td [(vari-)]TJ -322.949 -11.955 Td [(able;)-364(whenever)74(,)-344(inside)-326(the)-326(r)18(outine,)-345(an)-326(err)18(or)-325(is)-326(detected,)-345(this)-326(variable)-326(is)-325(set)-326(to)]TJ 0 -11.955 Td [(a)-384(value)-384(corr)18(esponding)-384(to)-384(a)-384(speci\002c)-384(err)18(or)-384(code.)-711(Then)-384(this)-384(err)18(or)-384(code)-384(is)-384(also)]TJ 0 -11.955 Td [(pushed)-274(on)-273(the)-274(err)18(or)-274(stack)-274(and)-273(then)-274(either)-274(contr)18(ol)-274(is)-273(r)18(eturned)-274(to)-274(the)-273(caller)-274(r)18(ou-)]TJ 0 -11.955 Td [(tine)-342(or)-342(the)-342(execution)-343(is)-342(aborted,)-365(depending)-342(on)-342(the)-342(users)-342(choice.)-587(At)-342(the)-342(time)]TJ 0 -11.955 Td [(when)-243(the)-242(execution)-243(is)-242(aborted,)-244(an)-243(err)18(or)-242(message)-243(is)-243(p)1(rinted)-243(on)-243(standar)18(d)-242(output)]TJ 0 -11.956 Td [(with)-257(a)-256(level)-257(of)-256(verbosity)-257(than)-256(can)-257(be)-256(chosen)-257(by)-257(the)-256(user)74(.)-330(If)-256(the)-257(execution)-257(is)-256(not)]TJ 0 -11.955 Td [(aborted,)-259(then,)-259(the)-257(caller)-257(r)18(outine)-258(checks)-257(the)-257(value)-257(r)18(eturned)-257(in)-258(the)]TJ/F67 9.9626 Tf 284.621 0 Td [(info)]TJ/F62 9.9626 Tf 23.484 0 Td [(variable)]TJ -308.105 -11.955 Td [(and,)-290(if)-282(not)-282(zer)18(o,)-291(an)-282(err)18(or)-282(condition)-282(is)-282(raised.)-407(This)-282(pr)18(ocess)-282(continues)-282(on)-282(all)-282(the)]TJ 0 -11.955 Td [(levels)-203(of)-203(nested)-203(calls)-203(until)-203(the)-203(level)-203(wher)18(e)-202(the)-203(user)-203(decides)-203(to)-203(abort)-203(the)-203(pr)18(ogram)]TJ 0 -11.955 Td [(execution.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-286(5)]TJ +0 g 0 G + [-285(shows)-286(the)-286(layou)1(t)-286(of)-286(a)-285(generic)]TJ/F67 9.9626 Tf 172.064 0 Td [(psb_foo)]TJ/F62 9.9626 Tf 39.458 0 Td [(r)18(outine)-286(with)-285(r)18(espect)-286(to)-286(the)]TJ -226.466 -11.956 Td [(PSBLAS-2.0)-258(err)18(or)-259(handling)-258(policy)111(.)-335(It)-258(is)-258(possible)-259(to)-258(see)-258(how)92(,)-261(whenever)-258(an)-258(err)18(or)]TJ 0 -11.955 Td [(condition)-298(is)-298(detected,)-311(the)]TJ/F67 9.9626 Tf 114.879 0 Td [(info)]TJ/F62 9.9626 Tf 23.893 0 Td [(variable)-298(is)-299(set)-298(to)-298(the)-298(corr)18(esponding)-299(err)18(or)-298(code)]TJ -138.772 -11.955 Td [(which)-309(is,)-324(then,)-324(pushed)-310(on)-309(top)-309(of)-310(the)-309(stack)-309(by)-309(means)-310(of)-309(the)]TJ/F67 9.9626 Tf 265.277 0 Td [(psb_errpush)]TJ/F62 9.9626 Tf 57.534 0 Td [(.)-488(An)]TJ -322.811 -11.955 Td [(err)18(or)-325(condition)-326(may)-325(be)-326(dir)18(ectl)1(y)-326(detected)-325(inside)-326(a)-325(r)18(outine)-325(or)-326(indir)18(ectly)-325(check-)]TJ 0 -11.955 Td [(ing)-331(the)-331(err)18(or)-331(code)-331(r)18(eturned)-331(r)18(eturned)-331(by)-331(a)-331(called)-331(r)18(outine.)-553(Whenever)-331(an)-331(err)18(or)]TJ 0 -11.956 Td [(is)-253(encounter)18(ed,)-255(after)-253(it)-254(has)-253(been)-254(pushed)-253(on)-254(st)1(ack,)-255(the)-253(pr)18(ogram)-254(execution)-253(skips)]TJ 0 -11.955 Td [(to)-264(a)-265(point)-264(wher)18(e)-264(the)-265(err)18(or)-264(condition)-264(is)-264(handled;)-272(the)-264(err)18(or)-265(condition)-264(is)-264(handled)]TJ 0 -11.955 Td [(either)-336(by)-336(r)18(eturning)-336(contr)18(ol)-336(to)-336(the)-336(caller)-335(r)17(o)1(utine)-336(or)-336(by)-336(calling)-336(the)]TJ/F67 9.9626 Tf 291.408 0 Td [(psb\134_error)]TJ/F62 9.9626 Tf -291.408 -11.955 Td [(r)18(outine)-273(which)-274(prints)-273(the)-274(content)-273(of)-273(the)-274(err)18(or)-273(stack)-274(and)-273(aborts)-273(the)-274(pr)18(ogram)-273(ex-)]TJ 0 -11.955 Td [(ecution,)-373(accor)18(ding)-348(to)-348(the)-348(choice)-348(made)-348(by)-348(the)-348(user)-348(with)]TJ/F67 9.9626 Tf 252.305 0 Td [(psb_set_erraction)]TJ/F62 9.9626 Tf 88.915 0 Td [(.)]TJ -341.22 -11.955 Td [(The)-297(default)-296(is)-297(to)-296(print)-297(the)-297(err)18(or)-296(and)-297(terminate)-296(the)-297(pr)18(ogram,)-308(but)-297(the)-297(user)-296(may)]TJ 0 -11.956 Td [(choose)-250(to)-250(handle)-250(the)-250(err)18(or)-250(explicitly)111(.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-347(6)]TJ +0 g 0 G + [-348(r)18(eports)-347(a)-347(sample)-347(err)18(or)-348(message)-347(generated)-347(by)-348(the)-347(PSBLAS-2.0)-347(li-)]TJ -14.944 -11.955 Td [(brary)111(.)-539(This)-327(err)18(or)-326(has)-327(been)-326(generated)-327(by)-326(the)-326(fact)-327(that)-326(the)-327(user)-326(has)-327(chosen)-326(the)]TJ 0 -11.955 Td [(invalid)-379(\223FOO\224)-380(stor)1(a)-1(g)1(e)-380(format)-379(to)-379(r)18(epr)18(esent)-380(the)-379(sparse)-379(matrix.)-698(Fr)18(om)-380(this)-379(er)18(-)]TJ 0 -11.955 Td [(r)18(or)-394(message)-393(it)-394(is)-393(possible)-394(to)-394(se)1(e)-394(that)-394(the)-393(err)18(or)-394(has)-393(been)-394(detected)-394(inside)-393(the)]TJ/F67 9.9626 Tf 0 -11.955 Td [(psb_cest)]TJ/F62 9.9626 Tf 45.361 0 Td [(subr)18(outine)-353(called)-353(by)]TJ/F67 9.9626 Tf 95.326 0 Td [(psb_spasb)]TJ/F62 9.9626 Tf 50.591 0 Td [(...)-619(by)-354(pr)18(ocess)-353(0)-353(\050i.e.)-619(the)-353(r)18(oot)-354(pr)18(o-)]TJ -191.278 -11.956 Td [(cess\051.)]TJ +0 g 0 G + 164.384 -198.123 Td [(136)]TJ +0 g 0 G ET + +endstream +endobj +1876 0 obj +<< +/Length 10302 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG q -1 0 0 1 333.945 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 99.895 417.212 cm +0 0 343.711 292.902 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 337.084 577.576 Td [(type)]TJ +/F67 9.9626 Tf 102.884 698.757 Td [(subroutine)]TJ +0 g 0 G + [-525(psb_foo\050some)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(args,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 9.9626 Tf 15.691 -11.956 Td [(!...)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 0 -11.955 Td [(if)]TJ +0 g 0 G + [(\050error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(detected\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +0 g 0 G + 15.691 -11.955 Td [(info)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(errcode1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ +0 g 0 G + [-525(psb_errpush\050)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [(\015psb_foo\015)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(errcode1\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(goto)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(end)-525(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 9.9626 Tf 0 -11.956 Td [(!...)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 0 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [-525(psb_bar\050some)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -21.918 Td [(On)-250(Return)]TJ + [-525(args,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(ovrel)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(if)]TJ 0 g 0 G -/F54 9.9626 Tf 28.234 0 Td [(The)-250(list)-250(of)-250(overlap)-250(elements)-250(on)-250(the)-250(calling)-250(pr)18(ocess,)-250(in)-250(local)-250(numbering.)]TJ -3.327 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-234(as:)-302(a)-234(rank)-234(one)-234(array)-235(with)-234(the)-234(ALLOCA)74(T)74(ABLE)-234(attribute,)-237(of)-234(type)]TJ 0 -11.955 Td [(integer)74(.)]TJ + [(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -31.88 Td [(info)]TJ + [-525(.ne.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ + [-525(zero\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G - [-500(If)-287(ther)18(e)-287(ar)18(e)-287(no)-287(overlap)-287(elements)-287(the)-287(output)-287(vector)-287(is)-287(set)-287(to)-287(the)-287(\223not)-287(allo-)]TJ 12.453 -11.955 Td [(cated\224)-250(state.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ + 15.691 -11.955 Td [(info)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-500(Otherwise)-194(the)-194(size)-195(of)]TJ/F59 9.9626 Tf 105.434 0 Td [(ovrel)]TJ/F54 9.9626 Tf 28.087 0 Td [(will)-194(be)-194(exactly)-195(equal)-194(to)-194(the)-194(number)-195(of)-194(overlap)]TJ -121.068 -11.955 Td [(elements.)]TJ + [(errcode2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -220.139 Td [(105)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(call)]TJ 0 g 0 G -ET - -endstream -endobj -1592 0 obj -<< -/Length 5480 ->> -stream + [-525(psb_errpush\050)]TJ +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [(\015psb_foo\015)]TJ 0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.25)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(sp)]TJ -ET -q -1 0 0 1 221.133 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 224.719 706.129 Td [(getrow)-250(\227)-250(Extract)-250(row\050s\051)-250(from)-250(a)-250(sparse)-250(matrix)]TJ + [-525(errcode2\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.956 Td [(goto)]TJ 0 g 0 G -/F59 9.9626 Tf -74.014 -19.204 Td [(call)-525(psb_sp_getrow\050row,)-525(a,)-525(nz,)-525(ia,)-525(ja,)-525(val,)-525(info,)-525(&)]TJ 73.225 -11.955 Td [(&)-525(append,)-525(nzin,)-525(lrw\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -73.225 -22.29 Td [(T)90(ype:)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -20.42 Td [(On)-250(Entry)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(end)-525(if)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -20.421 Td [(row)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 9.9626 Tf 0 -11.955 Td [(!...)]TJ 0 g 0 G -/F54 9.9626 Tf 22.695 0 Td [(The)-250(\050\002rst\051)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 2.212 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F61 10.3811 Tf 104.322 0 Td [(>)]TJ/F54 9.9626 Tf 10.962 0 Td [(0.)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG +/F67 9.9626 Tf -15.691 -11.955 Td [(9999)]TJ 0 g 0 G -/F51 9.9626 Tf -140.191 -20.42 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(matrix)-250(fr)18(om)-250(which)-250(to)-250(get)-250(r)18(ows.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf -28.343 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf -23.999 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 495.976 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 495.777 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 495.976 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 495.777 Td [(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(continue)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 15.691 -11.955 Td [(if)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -20.421 Td [(append)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 39.292 0 Td [(Whether)-250(to)-250(append)-250(or)-250(overwrite)-250(existing)-250(output.)]TJ -14.386 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(value)-250(default:)-310(false)-250(\050overwrite\051.)]TJ + [-525(\050err_act)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(nzin)]TJ + [-525(.eq.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 25.454 0 Td [(Input)-250(size)-250(to)-250(be)-250(appended)-250(to.)]TJ -0.548 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf -24 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-304(as:)-417(an)-303(integer)]TJ/F61 10.3811 Tf 106.988 0 Td [(>)]TJ/F54 9.9626 Tf 11.949 0 Td [(0.)-471(When)-303(append)-303(is)-304(tr)8(ue,)-317(speci\002es)-303(how)-304(many)]TJ -118.937 -11.955 Td [(entries)-250(in)-250(the)-250(output)-250(vectors)-250(ar)18(e)-250(alr)18(eady)-250(\002lled.)]TJ + [-525(act_abort\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(lrw)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 15.691 -11.955 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(The)-250(last)-250(r)18(ow)-250(to)-250(be)-250(extracted.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf -28.344 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf -24 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)]TJ/F61 10.3811 Tf 104.323 0 Td [(>)]TJ/F54 9.9626 Tf 10.962 0 Td [(0,)-250(default:)]TJ/F52 9.9626 Tf 46.878 0 Td [(r)-17(o)-35(w)]TJ/F54 9.9626 Tf 16.134 0 Td [(.)]TJ + [-525(psb_error\050icontxt\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -203.203 -22.29 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.956 Td [(return)]TJ 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(else)]TJ 0 g 0 G - 0 -20.42 Td [(nz)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -11.955 Td [(return)]TJ 0 g 0 G -/F54 9.9626 Tf 16.05 0 Td [(the)-250(number)-250(of)-250(elements)-250(r)18(eturned)-250(by)-250(this)-250(call.)]TJ 8.856 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(scalar)74(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -15.691 -11.955 Td [(end)-525(if)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -20.421 Td [(ia)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -23.91 Td [(end)-525(subroutine)]TJ 0 g 0 G -/F54 9.9626 Tf 13.28 0 Td [(the)-250(r)18(ow)-250(indices.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.344 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.923 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.293 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F59 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F54 9.9626 Tf 60.025 0 Td [(attribute.)]TJ + [-525(psb_foo)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - -89.662 -29.887 Td [(106)]TJ 0 g 0 G +/F62 9.9626 Tf -2.989 -41.729 Td [(Listing)-289(5:)-387(The)-289(layout)-289(of)-289(a)-289(generic)]TJ/F67 9.9626 Tf 149.96 0 Td [(psb)]TJ ET - -endstream -endobj -1596 0 obj -<< -/Length 3529 ->> -stream +q +1 0 0 1 266.174 382.258 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 269.312 382.059 Td [(foo)]TJ/F62 9.9626 Tf 18.568 0 Td [(r)18(outine)-289(with)-289(r)18(espect)-288(to)-289(PSBLAS-2.0)]TJ -187.985 -11.955 Td [(err)18(or)-250(handling)-250(policy)111(.)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG +/F67 9.9626 Tf 0 -19.609 Td [(==========================================================)]TJ 0 g 0 G + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(ja)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(the)-250(column)-250(indices)-250(of)-250(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 11.626 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(with)-250(the)]TJ/F59 9.9626 Tf 169.114 0 Td [(ALLOCATABLE)]TJ/F54 9.9626 Tf 60.024 0 Td [(attribute.)]TJ + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -254.045 -19.925 Td [(val)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 18.82 0 Td [(the)-250(elements)-250(to)-250(be)-250(inserted.)]TJ 6.087 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -49.922 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(array)-250(with)-250(the)]TJ/F59 9.9626 Tf 148.761 0 Td [(ALLOCATABLE)]TJ/F54 9.9626 Tf 60.024 0 Td [(attribute.)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -233.692 -19.925 Td [(info)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(4010)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G - [-500(The)-307(output)]TJ/F52 9.9626 Tf 65.308 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 13.394 0 Td [(is)-307(always)-307(the)-307(size)-307(of)-307(the)-307(output)-307(generated)-307(by)-306(the)-307(curr)18(ent)]TJ -66.249 -11.955 Td [(call;)-283(thus,)-277(if)]TJ/F59 9.9626 Tf 53.971 0 Td [(append=.true.)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)-278(the)-271(total)-272(output)-272(size)-272(will)-272(be)]TJ/F52 9.9626 Tf 129.372 0 Td [(n)-25(z)-18(i)-32(n)]TJ/F85 10.3811 Tf 21.286 0 Td [(+)]TJ/F52 9.9626 Tf 10.336 0 Td [(n)-25(z)]TJ/F54 9.9626 Tf 10.337 0 Td [(,)-277(with)]TJ -293.296 -11.955 Td [(the)-292(newly)-293(extracted)-292(coef)18(\002cients)-293(stor)18(ed)-292(in)-293(entries)]TJ/F59 9.9626 Tf 217.177 0 Td [(nzin+1:nzin+nz)]TJ/F54 9.9626 Tf 76.139 0 Td [(of)-292(the)]TJ -293.316 -11.955 Td [(array)-250(ar)18(guments;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G - [-500(When)]TJ/F59 9.9626 Tf 41.275 0 Td [(append=.true.)]TJ/F54 9.9626 Tf 70.485 0 Td [(the)-250(output)-250(arrays)-250(ar)18(e)-250(r)18(eallocated)-250(as)-250(necessary;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -111.76 -19.925 Td [(3.)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(The)-218(r)18(ow)-218(and)-219(column)-218(indices)-218(ar)18(e)-218(r)18(eturned)-218(in)-219(the)-218(local)-218(numbering)-218(scheme;)]TJ 12.453 -11.955 Td [(if)-190(the)-190(global)-190(numbering)-190(is)-190(desir)18(ed,)-202(the)-190(user)-190(may)-190(employ)-190(the)]TJ/F59 9.9626 Tf 258.836 0 Td [(psb_loc_to_glob)]TJ/F54 9.9626 Tf -258.836 -11.955 Td [(r)18(outine)-250(on)-250(the)-250(output.)]TJ + [-525(df_sample)]TJ 0 -11.955 Td [(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -290.909 Td [(107)]TJ + [-525(from)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1606 0 obj -<< -/Length 3995 ->> -stream + [-525(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(to)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(6.26)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(sizeof)-250(\227)-250(Memory)-250(occupation)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-250(function)-250(computes)-250(the)-250(memory)-250(occupation)-250(of)-250(a)-250(PSBLAS)-250(object.)]TJ + [-525(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf 0 -21.918 Td [(isz)-525(=)-525(psb_sizeof\050a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050desc_a\051)]TJ 0 -11.955 Td [(isz)-525(=)-525(psb_sizeof\050prec\051)]TJ +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(dist)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.955 Td [(==========================================================)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(a)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(A)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 72.97 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(.)]TJ -65.344 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 531.947 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 531.748 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 531.947 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 531.748 Td [(type)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 512.022 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 511.823 Td [(a)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(4010)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(Communication)-250(descriptor)74(.)]TJ -8.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 464.201 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 464.002 Td [(desc)]TJ -ET -q -1 0 0 1 384.755 464.201 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 387.893 464.002 Td [(type)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -19.925 Td [(prec)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 388.441 408.41 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 391.579 408.211 Td [(prec)]TJ -ET -q -1 0 0 1 413.128 408.41 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 416.266 408.211 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(On)-250(Return)]TJ + [-525(mat_distv)]TJ 0 -11.955 Td [(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(from)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(Function)-250(value)]TJ + [-525(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 72.776 0 Td [(The)-322(memory)-322(occupation)-322(of)-323(the)-322(object)-322(speci\002ed)-322(in)-322(the)-322(calling)]TJ -47.87 -11.956 Td [(sequence,)-250(in)-250(bytes.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(Returned)-250(as:)-310(an)]TJ/F59 9.9626 Tf 71.82 0 Td [(integer\050psb_long_int_k_\051)]TJ/F54 9.9626 Tf 128.019 0 Td [(number)74(.)]TJ + [-525(to)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -60.362 -242.057 Td [(108)]TJ + [-525(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1610 0 obj -<< -/Length 5626 ->> -stream +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(psb_spasb)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.956 Td [(==========================================================)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(6.27)-1000(Sorting)-250(utilities)-250(\227)]TJ 0 -20.164 Td [(psb)]TJ -ET -q -1 0 0 1 120.53 686.164 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 124.116 685.965 Td [(msort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Merge-sort)-250(algorithm)]TJ -24.221 -12.574 Td [(psb)]TJ -ET -q -1 0 0 1 120.53 673.59 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 124.116 673.391 Td [(qsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Quicksort)-250(algorithm)]TJ -24.221 -12.575 Td [(psb)]TJ -ET -q -1 0 0 1 120.53 661.016 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 124.116 660.816 Td [(hsort)-250(\227)-250(Sorting)-250(by)-250(the)-250(Heapsort)-250(algorithm)]TJ + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf -24.221 -22.402 Td [(call)-525(psb_msort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_qsort\050x,ix,dir,flag\051)]TJ 0 -11.955 Td [(call)-525(psb_hsort\050x,ix,dir,flag\051)]TJ/F54 9.9626 Tf 14.944 -21.783 Td [(These)-236(serial)-235(r)18(outines)-236(sort)-236(a)-235(sequence)]TJ/F52 9.9626 Tf 162.066 0 Td [(X)]TJ/F54 9.9626 Tf 9.884 0 Td [(into)-236(ascending)-235(or)-236(descending)-236(or)18(der)74(.)]TJ -186.894 -11.955 Td [(The)-243(ar)18(gument)-243(meaning)-243(is)-242(identical)-243(for)-243(the)-243(thr)18(ee)-243(calls;)-245(the)-243(only)-243(dif)18(fer)18(ence)-242(is)-243(the)]TJ 0 -11.955 Td [(algorithm)-250(used)-250(to)-250(accomplish)-250(the)-250(task)-250(\050see)-250(Usage)-250(Notes)-250(below\051.)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -21.783 Td [(T)90(ype:)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.402 Td [(On)-250(Entry)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(4010)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G - 0 -22.402 Td [(x)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(to)-250(be)-250(sorted.)]TJ 14.944 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)-250(of)-250(\050at)-250(least\051)-250(the)-250(same)-250(size)-250(as)]TJ/F52 9.9626 Tf 254.189 0 Td [(X)]TJ/F54 9.9626 Tf 7.537 0 Td [(.)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -286.633 -22.402 Td [(dir)]TJ + [-525(psb_spasb)]TJ 0 -11.955 Td [(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 18.262 0 Td [(The)-250(desir)18(ed)-250(or)18(dering.)]TJ 6.645 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value:)]TJ + [-525(from)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -22.402 Td [(Integer)-250(and)-250(real)-250(data:)]TJ + [-525(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf 101.28 0 Td [(psb_sort_up_)]TJ/F54 9.9626 Tf 62.764 0 Td [(,)]TJ/F59 9.9626 Tf 5.525 0 Td [(psb_sort_down_)]TJ/F54 9.9626 Tf 73.225 0 Td [(,)]TJ/F59 9.9626 Tf 5.525 0 Td [(psb_asort_up_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf -294.396 -11.955 Td [(psb_asort_down_)]TJ/F54 9.9626 Tf 78.455 0 Td [(;)-250(default)]TJ/F59 9.9626 Tf 38.784 0 Td [(psb_sort_up_)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)]TJ + [-525(to)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -201.921 -17.178 Td [(Complex)-250(data:)]TJ + [-525(subroutine)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf 70.286 0 Td [(psb_lsort_up_)]TJ/F54 9.9626 Tf 67.995 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_lsort_down_)]TJ/F54 9.9626 Tf 78.455 0 Td [(,)]TJ/F59 9.9626 Tf 4.503 0 Td [(psb_asort_up_)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)]TJ/F59 9.9626 Tf 4.504 0 Td [(psb_asort_down_)]TJ/F54 9.9626 Tf 78.455 0 Td [(;)]TJ -354.777 -11.956 Td [(default)]TJ/F59 9.9626 Tf 33.803 0 Td [(psb_lsort_up_)]TJ/F54 9.9626 Tf 67.994 0 Td [(.)]TJ +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(psb_cest)]TJ 0 g 0 G -/F51 9.9626 Tf -148.622 -22.402 Td [(\003ag)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.955 Td [(==========================================================)]TJ 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(Whether)-250(to)-250(keep)-250(the)-250(original)-250(values)-250(in)]TJ/F52 9.9626 Tf 171.52 0 Td [(I)-81(X)]TJ/F54 9.9626 Tf 11.661 0 Td [(.)]TJ -179.863 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(integer)-190(value)]TJ/F59 9.9626 Tf 128.287 0 Td [(psb_sort_ovw_idx_)]TJ/F54 9.9626 Tf 90.809 0 Td [(or)]TJ/F59 9.9626 Tf 11.268 0 Td [(psb_sort_keep_idx_)]TJ/F54 9.9626 Tf 94.146 0 Td [(;)]TJ -324.51 -11.955 Td [(default)]TJ/F59 9.9626 Tf 33.803 0 Td [(psb_sort_ovw_idx_)]TJ/F54 9.9626 Tf 88.916 0 Td [(.)]TJ + 0 -11.955 Td [(Process:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -147.626 -24.395 Td [(On)-250(Return)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G + [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -22.402 Td [(x)]TJ + [-1050(PSBLAS)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(sequence)-250(of)-250(values,)-250(in)-250(the)-250(chosen)-250(or)18(dering.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(,)-250(r)18(eal)-250(or)-250(complex)-250(array)-250(of)-250(rank)-250(1.)]TJ + [-525(Error)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -22.402 Td [(ix)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(\050)]TJ 0 g 0 G -/F54 9.9626 Tf 13.281 0 Td [(A)-250(vector)-250(of)-250(indices.)]TJ 11.626 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-238(integer)-237(array)-238(of)-237(rank)-238(1,)-240(whose)-238(entries)-237(ar)18(e)-238(moved)-237(to)-238(the)-238(same)-237(position)]TJ 0 -11.955 Td [(as)-250(the)-250(corr)18(esponding)-250(entries)-250(in)]TJ/F52 9.9626 Tf 138.215 0 Td [(x)]TJ/F54 9.9626 Tf 5.205 0 Td [(.)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(136)]TJ 0 g 0 G - -3.943 -44.517 Td [(109)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(\051)]TJ 0 g 0 G -ET - -endstream -endobj -1615 0 obj -<< -/Length 7300 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(in)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(Notes)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ + [-525(subroutine:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(For)-370(integer)-370(or)-370(r)18(eal)-370(data)-370(the)-370(sorting)-370(can)-370(be)-370(performed)-370(in)-370(the)-370(up/down)]TJ 12.453 -11.956 Td [(dir)18(ection,)-250(on)-250(the)-250(natural)-250(or)-250(absolute)-250(values;)]TJ + [-525(psb_cest)]TJ 0 -11.955 Td [(Format)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ + [-525(FOO)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(For)-329(complex)-330(data)-329(the)-330(sorting)-329(can)-330(be)-329(done)-329(in)-330(a)-329(lexicographic)-330(or)18(der)-329(\050i.e.:)]TJ 12.453 -11.955 Td [(sort)-263(on)-263(the)-263(r)18(eal)-263(part)-263(with)-263(ties)-263(br)18(oken)-263(accor)18(ding)-263(to)-263(the)-263(imaginary)-263(part\051)-263(or)]TJ 0 -11.955 Td [(on)-250(the)-250(absolute)-250(values;)]TJ + [-525(is)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.925 Td [(3.)]TJ +0.73 0.38 0.84 rg 0.73 0.38 0.84 RG + [-525(unknown)]TJ 0 g 0 G - [-500(The)-325(r)18(outines)-325(r)18(eturn)-325(the)-325(items)-324(in)-325(the)-325(chosen)-325(or)18(dering;)-362(the)-325(output)-325(dif)18(fer)18(-)]TJ 12.453 -11.956 Td [(ence)-244(is)-245(the)-244(handling)-244(of)-244(ties)-244(\050i.e.)-309(items)-244(with)-244(an)-244(equal)-245(v)1(alue\051)-245(in)-244(the)-244(original)]TJ 0 -11.955 Td [(input.)-316(W)55(ith)-252(the)-252(mer)18(ge-sort)-252(algorithm)-252(ties)-252(ar)18(e)-252(pr)18(eserved)-252(in)-252(the)-252(same)-252(r)18(ela-)]TJ 0 -11.955 Td [(tive)-278(or)18(der)-278(as)-278(they)-278(had)-278(in)-278(the)-278(original)-278(sequence,)-285(while)-278(this)-278(is)-278(not)-278(guaran-)]TJ 0 -11.955 Td [(teed)-250(for)-250(quicksort)-250(or)-250(heapsort;)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + 0 -11.956 Td [(==========================================================)]TJ 0 g 0 G - -12.453 -19.925 Td [(4.)]TJ + 0 -11.955 Td [(Aborting...)]TJ 0 g 0 G - [-500(If)]TJ/F52 9.9626 Tf 22.66 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F85 10.3811 Tf 20.72 0 Td [(=)]TJ/F52 9.9626 Tf 11.634 0 Td [(p)-25(s)-25(b)]TJ +/F62 9.9626 Tf 0 -29.397 Td [(Listing)-364(6:)-537(A)-364(sample)-364(PSBLAS-3.0)-363(err)18(or)-364(message.)-651(Pr)18(ocess)-364(0)-364(detected)-363(an)-364(err)18(or)]TJ 0 -11.955 Td [(condition)-250(inside)-250(the)-250(psb)]TJ ET q -1 0 0 1 232.862 542.941 cm +1 0 0 1 206.215 153.925 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 235.975 542.742 Td [(s)-25(o)-25(r)-35(t)]TJ +/F62 9.9626 Tf 209.204 153.726 Td [(cest)-250(subr)18(outine)]TJ +0 g 0 G + 55.075 -63.288 Td [(137)]TJ +0 g 0 G ET -q -1 0 0 1 253.036 542.941 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q + +endstream +endobj +1880 0 obj +<< +/Length 3570 +>> +stream +0 g 0 G +0 g 0 G BT -/F52 9.9626 Tf 256.149 542.742 Td [(o)-35(v)-25(w)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(8.1)-1000(psb)]TJ ET q -1 0 0 1 274.067 542.941 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 198.238 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F52 9.9626 Tf 277.11 542.742 Td [(i)-32(d)-42(x)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(error)-250(code)-250(onto)-250(the)-250(error)-250(stack)]TJ/F62 9.9626 Tf -49.379 -24.942 Td [(c)-175(a)-175(l)-174(l)-874(p)-98(s)-99(b)]TJ ET q -1 0 0 1 291.402 542.941 cm +1 0 0 1 200.841 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 296.503 542.742 Td [(then)-212(the)-212(entries)-212(in)]TJ/F52 9.9626 Tf 80.283 0 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.588 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(1)-290(:)]TJ/F52 9.9626 Tf 13.381 0 Td [(n)]TJ/F85 10.3811 Tf 5.788 0 Td [(\051)]TJ/F54 9.9626 Tf 6.262 0 Td [(wher)18(e)]TJ/F52 9.9626 Tf 29.644 0 Td [(n)]TJ/F54 9.9626 Tf 7.776 0 Td [(is)-212(the)-212(size)]TJ -276.762 -11.956 Td [(of)]TJ/F52 9.9626 Tf 12.255 0 Td [(x)]TJ/F54 9.9626 Tf 8.411 0 Td [(ar)18(e)-322(initialized)-321(to)]TJ/F52 9.9626 Tf 76.228 0 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.588 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F83 10.3811 Tf 8.364 0 Td [(\040)]TJ/F52 9.9626 Tf 14.651 0 Td [(i)]TJ/F54 9.9626 Tf 2.963 0 Td [(;)-358(thus,)-339(upon)-322(r)18(eturn)-321(fr)18(om)-322(the)-322(subr)18(outine,)]TJ -138.753 -11.955 Td [(for)-270(each)-271(index)]TJ/F52 9.9626 Tf 65.501 0 Td [(i)]TJ/F54 9.9626 Tf 5.657 0 Td [(we)-270(have)-271(in)]TJ/F52 9.9626 Tf 51.095 0 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.587 0 Td [(\050)]TJ/F52 9.9626 Tf 4.205 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 6.843 0 Td [(the)-270(position)-271(that)-270(the)-270(item)]TJ/F52 9.9626 Tf 114.324 0 Td [(x)]TJ/F85 10.3811 Tf 5.33 0 Td [(\050)]TJ/F52 9.9626 Tf 4.204 0 Td [(i)]TJ/F85 10.3811 Tf 3.088 0 Td [(\051)]TJ/F54 9.9626 Tf 6.844 0 Td [(occupied)]TJ -278.766 -11.955 Td [(in)-250(the)-250(original)-250(data)-250(sequence;)]TJ -0 g 0 G - -12.453 -19.925 Td [(5.)]TJ -0 g 0 G - [-500(If)]TJ/F52 9.9626 Tf 24.08 0 Td [(f)-160(l)-70(a)-47(g)]TJ/F85 10.3811 Tf 22.648 0 Td [(=)]TJ/F52 9.9626 Tf 13.563 0 Td [(p)-25(s)-25(b)]TJ +/F62 9.9626 Tf 204.812 681.187 Td [(e)-99(r)-98(r)-99(p)-98(u)-99(s)-99(h)-232(\050)-266(e)-132(r)-132(r)]TJ ET q -1 0 0 1 238.138 487.15 cm +1 0 0 1 270.843 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 241.252 486.951 Td [(s)-25(o)-25(r)-35(t)]TJ +/F62 9.9626 Tf 275.151 681.187 Td [(c)-440(,)-825(r)]TJ ET q -1 0 0 1 258.312 487.15 cm +1 0 0 1 299.7 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 261.426 486.951 Td [(k)-30(e)-25(e)-80(p)]TJ +/F62 9.9626 Tf 303.188 681.187 Td [(n)-50(a)-50(m)-50(e)-276(,)-929(i)]TJ ET q -1 0 0 1 280.648 487.15 cm +1 0 0 1 348.561 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F52 9.9626 Tf 283.692 486.951 Td [(i)-32(d)-42(x)]TJ +/F62 9.9626 Tf 353.087 681.187 Td [(e)-154(r)-155(r)-483(,)-920(a)]TJ ET q -1 0 0 1 297.983 487.15 cm +1 0 0 1 392.305 681.387 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 304.504 486.951 Td [(the)-355(r)18(ou)1(tine)-355(will)-354(assume)-355(that)-355(th)1(e)-355(entries)-355(i)1(n)]TJ/F52 9.9626 Tf -128.838 -11.955 Td [(i)-49(x)]TJ/F85 10.3811 Tf 8.588 0 Td [(\050)]TJ/F54 9.9626 Tf 4.274 0 Td [(:)]TJ/F85 10.3811 Tf 2.74 0 Td [(\051)]TJ/F54 9.9626 Tf 6.64 0 Td [(have)-250(alr)18(eady)-250(been)-250(initialized)-250(by)-250(the)-250(user;)]TJ -0 g 0 G - -34.75 -19.926 Td [(6.)]TJ -0 g 0 G - [-500(The)-270(thr)18(ee)-269(sorting)-270(algorithms)-269(have)-270(a)-269(similar)]TJ/F52 9.9626 Tf 205.79 0 Td [(O)]TJ/F85 10.3811 Tf 8 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)]TJ/F54 9.9626 Tf 7.324 0 Td [(log)]TJ/F52 9.9626 Tf 15.663 0 Td [(n)]TJ/F85 10.3811 Tf 5.788 0 Td [(\051)]TJ/F54 9.9626 Tf 6.835 0 Td [(expected)-270(r)8(unning)]TJ -241.221 -11.955 Td [(time;)-278(in)-268(the)-268(average)-269(case)-268(quicksort)-268(will)-269(be)-268(the)-268(fastest)-269(and)-268(mer)18(ge-sort)-268(the)]TJ 0 -11.955 Td [(slowest.)-310(However)-250(note)-250(that:)]TJ +/F62 9.9626 Tf 396.74 681.187 Td [(e)-145(r)-145(r)-279(\051)]TJ 0 g 0 G - 5.321 -19.925 Td [(\050a\051)]TJ -0 g 0 G - [-500(The)-336(worst)-336(case)-336(r)8(unning)-336(time)-336(for)-337(quicksort)-336(is)]TJ/F52 9.9626 Tf 220.017 0 Td [(O)]TJ/F85 10.3811 Tf 8 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)]TJ/F54 7.5716 Tf 5.664 3.616 Td [(2)]TJ/F85 10.3811 Tf 4.408 -3.616 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-379(the)-336(algorithm)]TJ -229.916 -11.955 Td [(implemented)-293(her)18(e)-293(follows)-293(the)-292(well-known)-293(median-of-thr)18(ee)-293(heuris-)]TJ 0 -11.956 Td [(tics,)-250(but)-250(the)-250(worst)-250(case)-250(may)-250(still)-250(apply;)]TJ 0 g 0 G - -17.125 -15.94 Td [(\050b\051)]TJ 0 g 0 G - [-500(The)-190(worst)-190(case)-190(r)8(unning)-190(time)-190(for)-190(mer)18(ge-sort)-190(and)-190(heap-sort)-190(is)]TJ/F52 9.9626 Tf 277.76 0 Td [(O)]TJ/F85 10.3811 Tf 8 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(n)]TJ/F54 9.9626 Tf 7.324 0 Td [(log)]TJ/F52 9.9626 Tf 15.663 0 Td [(n)]TJ/F85 10.3811 Tf 5.788 0 Td [(\051)]TJ/F54 9.9626 Tf -301.684 -11.955 Td [(as)-250(the)-250(average)-250(case;)]TJ +/F59 9.9626 Tf -246.035 -27.895 Td [(T)90(ype:)]TJ 0 g 0 G - -16.04 -15.94 Td [(\050c\051)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - [-500(The)-244(mer)18(ge-sort)-244(algorithm)-243(is)-244(implemented)-244(to)-244(take)-244(advantage)-243(of)-244(sub-)]TJ 16.04 -11.955 Td [(sequences)-314(that)-313(may)-314(be)-314(alr)18(eady)-314(in)-313(the)-314(desir)18(ed)-314(or)18(dering)-314(prior)-313(to)-314(the)]TJ 0 -11.956 Td [(subr)18(outine)-390(call;)-459(this)-390(situation)-389(is)-390(r)18(elatively)-390(common)-389(when)-390(dealing)]TJ 0 -11.955 Td [(with)-335(gr)18(oups)-335(of)-335(indices)-335(of)-335(sparse)-336(matrix)-335(entries,)-356(thus)-335(mer)18(ge-sort)-335(is)]TJ 0 -11.955 Td [(the)-319(pr)18(eferr)18(ed)-318(choice)-319(when)-319(a)-318(sorting)-319(is)-319(needed)-318(by)-319(other)-319(r)18(outi)1(nes)-319(in)]TJ 0 -11.955 Td [(the)-250(library)111(.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 117.559 -193.275 Td [(110)]TJ 0 g 0 G + 0 -19.925 Td [(err)]TJ ET - -endstream -endobj -1628 0 obj -<< -/Length 171 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 164.035 613.641 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q BT -/F51 14.3462 Tf 99.895 705.784 Td [(7)-1000(Parallel)-250(environment)-250(routines)]TJ +/F59 9.9626 Tf 167.023 613.442 Td [(c)]TJ 0 g 0 G -/F54 9.9626 Tf 164.384 -615.346 Td [(111)]TJ +/F62 9.9626 Tf 9.405 0 Td [(the)-250(err)18(or)-250(code)]TJ -0.817 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ 0 g 0 G -ET - -endstream -endobj -1632 0 obj -<< -/Length 5510 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.1)-1000(psb)]TJ +/F59 9.9626 Tf -24.906 -19.925 Td [(r)]TJ ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 155.178 545.895 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS)-250(parallel)-250(environment)]TJ -0 g 0 G +/F59 9.9626 Tf 158.167 545.696 Td [(name)]TJ 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_init\050ctxt,)-525(np,)-525(basectxt,)-525(ids\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-214(subr)18(outine)-215(initializes)-214(the)-215(PSBLAS)-214(parallel)-215(envir)18(onment,)-221(de\002ning)-215(a)-214(vir)18(-)]TJ -14.944 -11.955 Td [(tual)-250(parallel)-250(machine.)]TJ +/F62 9.9626 Tf 29.888 0 Td [(the)-250(soutine)-250(wher)18(e)-250(the)-250(err)18(or)-250(has)-250(been)-250(caught.)]TJ -12.444 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(np)]TJ -0 g 0 G -/F54 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(all)-250(available)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(basectxt)]TJ -0 g 0 G -/F54 9.9626 Tf 41.494 0 Td [(the)-321(initial)-321(communication)-321(context.)-524(The)-321(new)-321(context)-321(will)-321(be)-321(de\002ned)]TJ -16.587 -11.955 Td [(fr)18(om)-250(the)-250(pr)18(ocesses)-250(participating)-250(in)-250(the)-250(initial)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-560(Default:)-310(use)-250(MPI)]TJ +/F59 9.9626 Tf -24.906 -31.881 Td [(i)]TJ ET q -1 0 0 1 387.574 466.194 cm +1 0 0 1 154.62 466.194 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 390.563 465.994 Td [(COMM)]TJ +/F59 9.9626 Tf 157.609 465.994 Td [(err)]TJ +0 g 0 G +/F62 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ 0.289 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -31.881 Td [(a)]TJ ET q -1 0 0 1 424.904 466.194 cm +1 0 0 1 156.284 398.448 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F54 9.9626 Tf 427.893 465.994 Td [(WORLD.)]TJ -0 g 0 G -/F51 9.9626 Tf -277.188 -19.925 Td [(ids)]TJ -0 g 0 G -/F54 9.9626 Tf 18.809 0 Td [(Identities)-306(of)-307(the)-306(pr)18(ocesses)-307(to)-306(use)-306(for)-307(the)-306(new)-306(context;)-335(the)-306(ar)18(gument)-307(is)-306(ig-)]TJ 6.097 -11.955 Td [(nor)18(ed)-388(when)]TJ/F59 9.9626 Tf 58.258 0 Td [(np)]TJ/F54 9.9626 Tf 14.324 0 Td [(is)-388(not)-388(speci\002ed.)-723(This)-388(allows)-388(the)-387(pr)18(ocesses)-388(in)-388(the)-388(new)]TJ -72.582 -11.955 Td [(envir)18(onment)-250(to)-250(be)-250(in)-250(an)-250(or)18(der)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(original)-250(one.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)111(.)-560(Default:)-310(use)-250(the)-250(indices)]TJ/F85 10.3811 Tf 240.57 0 Td [(\050)]TJ/F54 9.9626 Tf 4.15 0 Td [(0)-179(.)-192(.)-191(.)]TJ/F52 9.9626 Tf 19.966 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ +/F59 9.9626 Tf 159.273 398.249 Td [(err)]TJ 0 g 0 G -/F51 9.9626 Tf -322.483 -21.918 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ -1.375 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ 0 g 0 G -0 g 0 G - 0 -19.925 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-285(communication)-284(context)-285(identifying)-285(the)-284(virtual)-285(parallel)-285(machine,)-293(type)]TJ/F59 9.9626 Tf 3.885 -11.955 Td [(psb_ctxt_type)]TJ/F54 9.9626 Tf 67.995 0 Td [(.)-327(Note)-256(that)-256(this)-256(is)-256(always)-256(a)-255(duplicate)-256(of)]TJ/F59 9.9626 Tf 174.426 0 Td [(basectxt)]TJ/F54 9.9626 Tf 41.842 0 Td [(,)-257(so)-256(that)]TJ -284.263 -11.955 Td [(library)-296(communications)-297(ar)18(e)-296(completely)-297(separated)-296(fr)18(om)-297(other)-296(communi-)]TJ 0 -11.955 Td [(cation)-250(operations.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -66.381 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F51 11.9552 Tf -24.906 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(A)-250(call)-250(to)-250(this)-250(r)18(outine)-250(must)-250(pr)18(ecede)-250(any)-250(other)-250(PSBLAS)-250(call.)]TJ -0 g 0 G - 0 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(It)-194(is)-195(an)-194(err)18(or)-194(to)-194(specify)-195(a)-194(value)-194(for)]TJ/F52 9.9626 Tf 158.156 0 Td [(n)-80(p)]TJ/F54 9.9626 Tf 13.378 0 Td [(gr)18(eater)-194(than)-195(the)-194(number)-194(of)-194(pr)18(ocesses)]TJ -159.081 -11.955 Td [(available)-250(in)-250(the)-250(underlying)-250(base)-250(parallel)-250(envir)18(onment.)]TJ -0 g 0 G - 139.477 -84.647 Td [(112)]TJ + 139.477 -271.945 Td [(138)]TJ 0 g 0 G ET endstream endobj -1638 0 obj +1886 0 obj << -/Length 4457 +/Length 1332 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.2)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(8.2)-1000(psb)]TJ ET q 1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(info)-211(\227)-211(Return)-211(information)-210(about)-211(PSBLAS)-211(parallel)-211(en-)]TJ -24.221 -13.948 Td [(vironment)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -26.9 -18.964 Td [(call)-525(psb_info\050ctxt,)-525(iam,)-525(np\051)]TJ/F54 9.9626 Tf 14.944 -21.917 Td [(This)-397(subr)18(outine)-396(r)18(eturns)-397(information)-397(about)-396(the)-397(PSBLAS)-397(parallel)-396(envir)18(on-)]TJ -14.944 -11.956 Td [(ment,)-250(de\002ning)-250(a)-250(virtual)-250(parallel)-250(machine.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ -0 g 0 G +/F59 11.9552 Tf 151.016 706.129 Td [(error)-306(\227)-306(Prints)-306(the)-306(error)-307(s)1(tack)-307(content)-306(and)-306(aborts)-306(exe-)]TJ -24.221 -13.948 Td [(cution)]TJ/F62 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-900(p)-126(s)-125(b)]TJ +ET +q +1 0 0 1 151.092 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 155.328 667.24 Td [(e)-125(r)-125(r)-126(o)-125(r)-259(\050)-279(i)-146(c)-146(o)-147(n)-146(t)-146(x)-146(t)-280(\051)]TJ 0 g 0 G - 0 -19.925 Td [(iam)]TJ 0 g 0 G -/F54 9.9626 Tf 22.137 0 Td [(Identi\002er)-250(of)-250(curr)18(ent)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 2.77 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ/F83 10.3811 Tf 134.19 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)]TJ/F83 10.3811 Tf 7.873 0 Td [(\024)]TJ/F52 9.9626 Tf 11.017 0 Td [(i)-47(a)-25(m)]TJ/F83 10.3811 Tf 18.677 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1)]TJ 0 g 0 G -/F51 9.9626 Tf -239.579 -19.925 Td [(np)]TJ +/F59 9.9626 Tf -55.433 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 17.156 0 Td [(Number)-250(of)-250(pr)18(ocesses)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 7.751 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(For)-396(pr)18(ocesses)-396(in)-395(the)-396(virtual)-396(parallel)-396(machine)-396(the)-396(ident)1(i\002er)-396(will)-396(satisfy)]TJ 12.453 -11.955 Td [(0)]TJ/F83 10.3811 Tf 7.873 0 Td [(\024)]TJ/F52 9.9626 Tf 11.016 0 Td [(i)-47(a)-25(m)]TJ/F83 10.3811 Tf 18.678 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1;)]TJ 0 g 0 G - -84.741 -19.925 Td [(2.)]TJ + 0 -19.925 Td [(icontxt)]TJ 0 g 0 G - [-500(If)-349(the)-349(user)-350(has)-349(r)18(equested)-349(on)]TJ/F59 9.9626 Tf 142.217 0 Td [(psb_init)]TJ/F54 9.9626 Tf 45.321 0 Td [(a)-349(number)-349(of)-350(pr)18(ocesses)-349(less)-349(than)]TJ -175.085 -11.956 Td [(the)-321(total)-322(available)-321(in)-321(the)-322(parallel)-321(execution)-322(envir)18(onment,)-339(the)-321(r)18(emaining)]TJ 0 -11.955 Td [(pr)18(ocesses)-229(will)-228(have)-229(on)-229(r)18(eturn)]TJ/F52 9.9626 Tf 130.21 0 Td [(i)-47(a)-25(m)]TJ/F85 10.3811 Tf 18.678 0 Td [(=)]TJ/F83 10.3811 Tf 11.086 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1;)-236(the)-229(on)1(ly)-229(call)-229(involving)]TJ/F59 9.9626 Tf 110.162 0 Td [(ctxt)]TJ/F54 9.9626 Tf 23.2 0 Td [(that)]TJ -301.53 -11.955 Td [(any)-250(such)-250(pr)18(ocess)-250(may)-250(execute)-250(is)-250(to)]TJ/F59 9.9626 Tf 155.296 0 Td [(psb_exit)]TJ/F54 9.9626 Tf 41.843 0 Td [(.)]TJ +/F62 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ 0 g 0 G - -57.662 -174.311 Td [(113)]TJ + 139.477 -461.235 Td [(139)]TJ 0 g 0 G ET endstream endobj -1644 0 obj +1893 0 obj << -/Length 4180 +/Length 1526 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.3)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(8.3)-1000(psb)]TJ ET q 1 0 0 1 198.238 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(exit)-250(\227)-250(Exit)-250(from)-250(PSBLAS)-250(parallel)-250(environment)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_exit\050ctxt\051)]TJ 0 -11.955 Td [(call)-525(psb_exit\050ctxt,close\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(exits)-250(fr)18(om)-250(the)-250(PSBLAS)-250(parallel)-250(virtual)-250(machine.)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F59 11.9552 Tf 201.825 706.129 Td [(set)]TJ +ET +q +1 0 0 1 217.809 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 221.396 706.129 Td [(errverbosity)-190(\227)-190(Sets)-190(the)-190(verbosity)-190(of)-190(error)-190(messages)]TJ/F62 9.9626 Tf -68.95 -24.942 Td [(c)-175(a)-175(l)-174(l)-921(p)-147(s)-146(b)]TJ +ET +q +1 0 0 1 202.736 681.387 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 207.181 681.187 Td [(s)-146(e)-146(t)]TJ +ET +q +1 0 0 1 224.391 681.387 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 228.836 681.187 Td [(e)-146(r)-146(r)-146(v)-147(e)-146(r)-146(b)-146(o)-146(s)-146(i)-146(t)-147(y)-279(\050)-151(v)-151(\051)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(close)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(Whether)-369(to)-368(close)-369(all)-369(data)-368(str)8(uctur)18(es)-369(r)18(elated)-369(to)-368(the)-369(virtual)-369(parall)1(el)-369(ma-)]TJ -2.759 -11.955 Td [(chine,)-250(besides)-250(those)-250(associated)-250(with)-250(ctxt.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(logical)-250(variable,)-250(default)-250(value:)-310(tr)8(ue.)]TJ/F51 11.9552 Tf -24.907 -19.925 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -78.131 -27.895 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(This)-376(r)18(outine)-376(may)-377(be)-376(called)-376(even)-376(if)-377(a)-376(pr)18(evious)-376(call)-376(to)]TJ/F59 9.9626 Tf 255.069 0 Td [(psb_info)]TJ/F54 9.9626 Tf 45.591 0 Td [(has)-376(r)18(e-)]TJ -288.206 -11.955 Td [(turned)-251(with)]TJ/F52 9.9626 Tf 55.156 0 Td [(i)-47(a)-25(m)]TJ/F85 10.3811 Tf 18.696 0 Td [(=)]TJ/F83 10.3811 Tf 11.104 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1;)-251(indeed,)-252(it)-251(it)-251(i)1(s)-251(the)-251(only)-251(r)18(outine)-251(that)-251(may)-251(be)-251(called)]TJ -93.15 -11.955 Td [(with)-250(ar)18(gument)]TJ/F59 9.9626 Tf 68.133 0 Td [(ctxt)]TJ/F54 9.9626 Tf 23.412 0 Td [(in)-250(this)-250(situation.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - -103.999 -19.926 Td [(2.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(A)-269(call)-269(to)-268(this)-269(r)18(outine)-269(with)]TJ/F59 9.9626 Tf 128.502 0 Td [(close=.true.)]TJ/F54 9.9626 Tf 65.442 0 Td [(implies)-269(a)-268(call)-269(to)]TJ/F59 9.9626 Tf 72.059 0 Td [(MPI_Finalize)]TJ/F54 9.9626 Tf 62.764 0 Td [(,)]TJ -316.313 -11.955 Td [(after)-250(which)-250(no)-250(parallel)-250(r)18(outine)-250(may)-250(be)-250(called.)]TJ 0 g 0 G - -12.454 -19.925 Td [(3.)]TJ + 0 -19.925 Td [(v)]TJ 0 g 0 G - [-500(If)-289(the)-288(user)-288(whishes)-289(to)-288(use)-289(multiple)-288(communication)-289(contexts)-288(in)-289(the)-288(same)]TJ 12.454 -11.955 Td [(pr)18(ogram,)-401(or)-371(to)-371(enter)-371(and)-371(exit)-371(multiple)-371(times)-370(into)-371(the)-371(parallel)-371(envir)18(on-)]TJ 0 -11.956 Td [(ment,)-425(this)-389(r)18(outine)-390(may)-390(be)-390(called)-389(to)-390(selectively)-390(close)-390(the)-389(contexts)-390(with)]TJ/F59 9.9626 Tf 0 -11.955 Td [(close=.false.)]TJ/F54 9.9626 Tf 67.994 0 Td [(,)-202(while)-190(on)-190(the)-190(last)-190(call)-190(it)-190(should)-190(be)-190(called)-190(with)]TJ/F59 9.9626 Tf 196.919 0 Td [(close=.true.)]TJ/F54 9.9626 Tf -264.913 -11.955 Td [(to)-250(shutdown)-250(in)-250(a)-250(clean)-250(way)-250(the)-250(entir)18(e)-250(parallel)-250(envir)18(onment.)]TJ +/F62 9.9626 Tf 10.52 0 Td [(the)-250(verbosity)-250(level)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ 0 g 0 G - 139.477 -212.169 Td [(114)]TJ + 139.477 -475.183 Td [(140)]TJ 0 g 0 G ET endstream endobj -1651 0 obj +1899 0 obj << -/Length 2476 +/Length 2016 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.4)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(8.4)-1000(psb)]TJ ET q 1 0 0 1 147.429 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(get)]TJ +/F59 11.9552 Tf 151.016 706.129 Td [(set)]TJ ET q -1 0 0 1 168.338 706.328 cm +1 0 0 1 166.999 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 171.925 706.129 Td [(mpi)]TJ +/F59 11.9552 Tf 170.586 706.129 Td [(erraction)-223(\227)-223(Set)-222(the)-223(type)-223(of)-223(action)-223(to)-222(be)-223(taken)-223(upon)]TJ -43.792 -13.948 Td [(error)-250(condition)]TJ/F62 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-926(p)-151(s)-151(b)]TJ ET q -1 0 0 1 194.556 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 152.113 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 156.605 667.24 Td [(s)-151(e)-151(t)]TJ +ET +q +1 0 0 1 173.955 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 178.447 667.24 Td [(e)-151(r)-151(r)-150(a)-151(c)-151(t)-151(i)-151(o)-151(n)-284(\050)-296(e)-163(r)-162(r)]TJ +ET +q +1 0 0 1 257.102 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 198.143 706.129 Td [(comm)-250(\227)-250(Get)-250(the)-250(MPI)-250(communicator)]TJ +/F62 9.9626 Tf 261.712 667.24 Td [(a)-163(c)-162(t)-296(\051)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -98.248 -18.964 Td [(icomm)-525(=)-525(psb_get_mpi_comm\050ctxt\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-417(func)1(tion)-417(r)18(eturns)-416(the)-417(MPI)-416(communicator)-417(associated)-416(with)-417(a)-416(PSBLAS)]TJ -14.944 -11.955 Td [(context)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -161.817 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ + 0 -19.925 Td [(err)]TJ +ET +q +1 0 0 1 113.225 599.693 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 116.214 599.494 Td [(act)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 17.703 0 Td [(the)-250(type)-250(of)-250(action.)]TJ -9.115 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)-310(Possible)-250(values:)]TJ/F67 9.9626 Tf 179.116 0 Td [(psb_act_ret)]TJ/F62 9.9626 Tf 57.534 0 Td [(,)]TJ/F67 9.9626 Tf 4.981 0 Td [(psb_act_abort)]TJ/F62 9.9626 Tf 67.995 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ + -170.149 -461.235 Td [(141)]TJ 0 g 0 G +ET + +endstream +endobj +1905 0 obj +<< +/Length 507 +>> +stream 0 g 0 G - 0 -19.926 Td [(Function)-250(value)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-372(MPI)-371(communicator)-372(associated)-371(with)-372(the)-372(PSBLAS)-371(virtual)]TJ -47.87 -11.955 Td [(parallel)-250(machine.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F54 9.9626 Tf 34.165 0 Td [(The)-230(subr)18(outine)-230(version)]TJ/F59 9.9626 Tf 103.913 0 Td [(psb_get_mpicomm)]TJ/F54 9.9626 Tf 80.748 0 Td [(is)-230(still)-230(available)-230(but)-230(is)-230(depr)18(e-)]TJ -218.826 -11.955 Td [(cated.)]TJ +BT +/F59 14.3462 Tf 150.705 705.784 Td [(9)-1000(Utilities)]TJ/F62 9.9626 Tf 0 -22.702 Td [(W)92(e)-323(have)-322(some)-323(utilities)-322(available)-323(for)-322(input)-323(and)-323(output)-322(of)-323(sparse)-322(matrices;)-359(the)]TJ 0 -11.955 Td [(interfaces)-250(to)-250(these)-250(r)18(outines)-250(ar)18(e)-250(available)-250(in)-250(the)-250(module)]TJ/F67 9.9626 Tf 242.009 0 Td [(psb_util_mod)]TJ/F62 9.9626 Tf 62.764 0 Td [(.)]TJ 0 g 0 G - 164.384 -319.766 Td [(115)]TJ + -140.39 -580.689 Td [(142)]TJ 0 g 0 G ET endstream endobj -1656 0 obj +1910 0 obj << -/Length 3337 +/Length 4553 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.5)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(get)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(9.1)-1206(hb)]TJ ET q -1 0 0 1 219.148 706.328 cm +1 0 0 1 144.589 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 222.735 706.129 Td [(mpi)]TJ +/F59 11.9552 Tf 148.175 706.129 Td [(read)-206(\227)-207(Read)-206(a)-207(sparse)-206(matrix)-207(fr)1(om)-207(a)-206(\002le)-207(in)-206(the)-207(Harwell\226)]TJ -21.381 -13.948 Td [(Boeing)-250(format)]TJ/F62 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-865(h)-90(b)]TJ ET q -1 0 0 1 245.365 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 144.379 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 11.9552 Tf 248.952 706.129 Td [(rank)-250(\227)-250(Get)-250(the)-250(MPI)-250(rank)]TJ +/F62 9.9626 Tf 148.265 667.24 Td [(r)-90(e)-90(a)-90(d)-223(\050)-167(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-871(f)-97(i)-96(l)-96(e)-96(n)-96(a)-97(m)-96(e)-367(,)-791(b)-206(,)-919(m)-143(t)-144(i)-143(t)-143(l)-144(e)-277(\051)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -98.247 -18.964 Td [(rank)-525(=)-525(psb_get_mpi_rank\050ctxt,)-525(id\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(function)-250(r)18(eturns)-250(the)-250(MPI)-250(rank)-250(of)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 257.337 0 Td [(i)-32(d)]TJ 0 g 0 G -/F51 9.9626 Tf -272.281 -19.925 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -48.37 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ + 0 -19.925 Td [(\002lename)]TJ +0 g 0 G +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.534 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F67 9.9626 Tf 65.185 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0 g 0 G +/F62 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 347.544 442.084 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(id)]TJ +/F59 9.9626 Tf -268.571 -19.925 Td [(b)]TJ 0 g 0 G -/F54 9.9626 Tf 14.386 0 Td [(Identi\002er)-250(of)-250(a)-250(pr)18(ocess)-250(in)-250(the)-250(PSBLAS)-250(virtual)-250(parallel)-250(machine.)]TJ 10.521 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(0)]TJ/F83 10.3811 Tf 141.938 0 Td [(\024)]TJ/F52 9.9626 Tf 11.017 0 Td [(i)-32(d)]TJ/F83 10.3811 Tf 11.086 0 Td [(\024)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.503 0 Td [(\000)]TJ/F54 9.9626 Tf 10.132 0 Td [(1)]TJ +/F62 9.9626 Tf 11.069 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(2)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.956 Td [(attribute;)-361(will)-324(be)-324(al)1(located)-324(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side,)-250(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ 0 g 0 G -/F51 9.9626 Tf -223.669 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(mtitle)]TJ 0 g 0 G +/F62 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(A)-337(charachter)-338(variable)-337(of)-338(l)1(ength)-338(72)-337(holding)-338(a)-337(copy)-338(of)-337(the)-337(matrix)-338(title)-337(as)]TJ 0 -11.956 Td [(speci\002ed)-250(by)-250(the)-250(Harwell-Boeing)-250(format)-250(and)-250(contained)-250(in)-250(the)-250(input)-250(\002le.)]TJ 0 g 0 G - 0 -19.925 Td [(Funciton)-250(value)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(iret)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(The)-250(MPI)-250(rank)-250(associated)-250(with)-250(the)-250(PSBLAS)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 224.291 0 Td [(i)-32(d)]TJ/F54 9.9626 Tf 8.195 0 Td [(.)]TJ -280.356 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ/F51 11.9552 Tf -71.651 -33.873 Td [(Notes)]TJ/F54 9.9626 Tf 35.734 0 Td [(The)-388(subr)18(outine)-387(version)]TJ/F59 9.9626 Tf 108.62 0 Td [(psb_get_rank)]TJ/F54 9.9626 Tf 66.626 0 Td [(is)-388(still)-387(available)-388(but)-387(is)-388(depr)18(e-)]TJ -210.98 -11.955 Td [(cated.)]TJ +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 164.384 -275.93 Td [(116)]TJ + 139.477 -184.274 Td [(143)]TJ 0 g 0 G ET endstream endobj -1660 0 obj +1917 0 obj << -/Length 1155 +/Length 4948 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.6)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(9.2)-1000(hb)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 192.93 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(wtime)-250(\227)-250(W)74(all)-250(clock)-250(timing)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -51.121 -18.964 Td [(time)-525(=)-525(psb_wtime\050\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-298(function)-298(r)18(eturns)-299(a)-298(wall)-298(clock)-298(timer)74(.)-455(The)-298(r)18(esolution)-298(of)-299(the)-298(timer)-298(is)-298(de-)]TJ -14.944 -11.955 Td [(pendent)-250(on)-250(the)-250(underlying)-250(parallel)-250(envir)18(onment)-250(implementation.)]TJ +/F59 11.9552 Tf 196.517 706.129 Td [(write)-298(\227)-297(W)74(rite)-298(a)-298(sparse)-298(matrix)-297(to)-298(a)-298(\002le)-298(in)-297(the)-298(Harwell\226)]TJ -18.913 -13.948 Td [(Boeing)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-884(h)-109(b)]TJ +ET +q +1 0 0 1 195.753 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 199.827 667.24 Td [(w)-109(r)-109(i)-109(t)-109(e)-242(\050)-167(a)-241(,)-927(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-129(t)-435(,)-871(f)-96(i)-96(l)-96(e)-97(n)-96(a)-96(m)-96(e)-368(,)-816(k)-42(e)-42(y)-259(,)-855(r)-79(h)-80(s)-335(,)-918(m)-144(t)-143(i)-144(t)-143(l)-143(e)-277(\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Exit)]TJ +/F59 9.9626 Tf -49.122 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - 0 -19.926 Td [(Function)-250(value)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 72.777 0 Td [(the)-250(elapsed)-250(time)-250(in)-250(seconds.)]TJ -47.87 -11.955 Td [(Returned)-250(as:)-310(a)]TJ/F59 9.9626 Tf 66.022 0 Td [(real\050psb_dpk_\051)]TJ/F54 9.9626 Tf 75.715 0 Td [(variable.)]TJ 0 g 0 G - -2.26 -491.123 Td [(117)]TJ + 0 -19.925 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(required)]TJ/F62 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.137 0 Td [(psb)]TJ ET - -endstream -endobj -1664 0 obj -<< -/Length 1388 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 360.068 575.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.7)-1000(psb)]TJ +/F67 9.9626 Tf 363.206 575.584 Td [(Tspmat)]TJ ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 395.216 575.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(barrier)-240(\227)-240(Sinchronization)-239(point)-240(parallel)-240(environment)]TJ +/F67 9.9626 Tf 398.354 575.584 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F59 9.9626 Tf -51.12 -18.964 Td [(call)-525(psb_barrier\050ctxt\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-358(subr)18(outine)-357(acts)-358(as)-358(an)-358(explicit)-357(synchr)18(onization)-358(point)-358(for)-357(the)-358(PSBLAS)]TJ -14.944 -11.955 Td [(parallel)-250(virtual)-250(machine.)]TJ +/F59 9.9626 Tf -268.57 -19.926 Td [(b)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -19.925 Td [(T)90(ype:)]TJ +/F62 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(1)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.955 Td [(attribute;)-361(will)-324(be)-324(allocated)-323(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -24.906 -19.926 Td [(\002lename)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.368 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.534 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.765 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-234(\050i)1(.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.955 Td [(is)-250(used.)-310(Default:)]TJ/F67 9.9626 Tf 74.799 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -104.935 -19.926 Td [(iunit)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ +/F62 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.758 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F59 9.9626 Tf -317.894 -19.925 Td [(key)]TJ 0 g 0 G - 139.477 -455.258 Td [(118)]TJ +/F62 9.9626 Tf 21.589 0 Td [(Matrix)-250(key)111(.)]TJ 3.317 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(A)-291(charachter)-291(variable)-291(of)-291(length)-291(8)-291(holding)-291(the)-290(matrix)-291(key)-291(as)-291(speci\002ed)-291(by)]TJ 0 -11.955 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.925 Td [(mtitle)]TJ +0 g 0 G +/F62 9.9626 Tf 32.089 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(A)-239(charachter)-239(variable)-239(of)-239(length)-240(72)-239(holding)-239(the)-239(matrix)-239(title)-239(as)-239(speci\002ed)-239(by)]TJ 0 -11.956 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -21.917 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(iret)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.433 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.477 -128.483 Td [(144)]TJ 0 g 0 G ET endstream endobj -1668 0 obj +1924 0 obj << -/Length 1283 +/Length 3542 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.8)-1000(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(9.3)-1000(mm)]TJ ET q -1 0 0 1 147.429 706.328 cm +1 0 0 1 148.768 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(abort)-250(\227)-250(Abort)-250(a)-250(computation)]TJ +/F59 11.9552 Tf 152.354 706.129 Td [(mat)]TJ +ET +q +1 0 0 1 173.658 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 177.245 706.129 Td [(read)-202(\227)-203(Read)-202(a)-203(sparse)-202(matrix)-203(from)-202(a)-203(\002le)-202(in)-203(the)-202(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-810(m)-35(m)]TJ +ET +q +1 0 0 1 149.022 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 152.359 667.24 Td [(m)-35(a)-35(t)]TJ +ET +q +1 0 0 1 171.029 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 174.367 667.24 Td [(r)-35(e)-35(a)-35(d)-169(\050)-166(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-107(a)-106(m)-107(e)-241(\051)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -51.121 -18.964 Td [(call)-525(psb_abort\050ctxt\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(aborts)-250(computation)-250(on)-250(the)-250(parallel)-250(virtual)-250(machine.)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf -74.472 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ + 0 -19.925 Td [(\002lename)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.534 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F67 9.9626 Tf 65.185 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G - 139.477 -467.213 Td [(119)]TJ +/F59 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0 g 0 G +/F62 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 442.283 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 347.544 442.084 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -268.571 -19.925 Td [(iret)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.477 -307.811 Td [(145)]TJ 0 g 0 G ET endstream endobj -1567 0 obj +1822 0 obj << /Type /ObjStm /N 100 -/First 970 -/Length 9451 ->> -stream -392 0 1566 58 1562 117 1570 224 1568 363 1572 510 396 568 1573 625 1569 683 1577 790 -1575 929 1579 1074 400 1133 1580 1191 1581 1250 1576 1309 1584 1403 1582 1542 1586 1687 405 1745 -1587 1802 1588 1860 1583 1918 1591 2012 1589 2151 1593 2296 409 2355 1590 2413 1595 2534 1597 2652 -1598 2710 1599 2768 1600 2826 1594 2884 1605 3004 1601 3161 1602 3306 1603 3453 1607 3600 413 3659 -1604 3717 1609 3824 1611 3942 417 4000 1608 4057 1614 4164 1616 4282 1617 4341 1618 4400 1619 4459 -1620 4518 1621 4577 1622 4636 1623 4695 1624 4754 1625 4813 1613 4872 1627 4992 1629 5110 421 5168 -1626 5225 1631 5306 1633 5424 425 5483 1634 5541 1635 5600 1630 5659 1637 5792 1639 5910 429 5968 -1640 6025 1641 6082 1636 6138 1643 6271 1645 6389 433 6448 1646 6506 1647 6565 1648 6624 1642 6683 -1650 6816 1652 6934 437 6992 1649 7049 1655 7143 1657 7261 441 7320 1654 7378 1659 7498 1661 7616 -445 7674 1658 7731 1663 7825 1665 7943 449 8002 1662 8060 1667 8154 1669 8272 453 8330 1666 8387 -% 392 0 obj +/First 972 +/Length 9404 +>> +stream +1819 0 1821 118 473 177 1818 235 1824 382 1826 500 1827 558 1828 616 1829 674 1823 732 +1831 869 1833 987 477 1046 1830 1104 1835 1251 1837 1369 1838 1427 1839 1485 1840 1543 1834 1601 +1842 1738 1844 1856 481 1915 1841 1973 1846 2120 1848 2238 1849 2296 1850 2354 1852 2411 1853 2469 +1854 2527 1845 2585 1857 2764 1859 2882 485 2941 1860 2999 1856 3058 1862 3205 1864 3323 489 3381 +1865 3438 1861 3496 1869 3643 1866 3791 1867 3939 1871 4087 493 4146 1868 4204 1875 4298 1877 4416 +1872 4474 1873 4532 1874 4590 1879 4686 1881 4804 497 4863 1882 4921 1883 4980 1878 5039 1885 5120 +1887 5238 501 5296 1888 5353 1889 5410 1884 5468 1892 5549 1894 5667 505 5726 1895 5784 1896 5843 +1891 5902 1898 5983 1900 6101 509 6159 1901 6216 1902 6273 1897 6331 1904 6425 1906 6543 513 6602 +1903 6660 1909 6754 1907 6893 1911 7038 517 7096 1912 7153 1913 7211 1908 7269 1916 7363 1914 7502 +1918 7647 521 7706 1919 7764 1920 7823 1915 7882 1923 7976 1921 8115 1925 8260 525 8318 1926 8375 +% 1819 0 obj << -/D [1563 0 R /XYZ 150.705 716.092 null] +/Type /Page +/Contents 1820 0 R +/Resources 1818 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1817 0 R >> -% 1566 0 obj +% 1821 0 obj << -/D [1563 0 R /XYZ 150.705 382.093 null] +/D [1819 0 R /XYZ 149.705 753.953 null] >> -% 1562 0 obj +% 473 0 obj +<< +/D [1819 0 R /XYZ 150.705 716.092 null] +>> +% 1818 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1570 0 obj +% 1824 0 obj << /Type /Page -/Contents 1571 0 R -/Resources 1569 0 R +/Contents 1825 0 R +/Resources 1823 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1568 0 R ] +/Parent 1817 0 R >> -% 1568 0 obj +% 1826 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 494.069 359.001 506.129] -/A << /S /GoTo /D (descdata) >> +/D [1824 0 R /XYZ 98.895 753.953 null] >> -% 1572 0 obj +% 1827 0 obj << -/D [1570 0 R /XYZ 98.895 753.953 null] +/D [1824 0 R /XYZ 99.895 528.579 null] >> -% 396 0 obj +% 1828 0 obj << -/D [1570 0 R /XYZ 99.895 716.092 null] +/D [1824 0 R /XYZ 99.895 494.104 null] >> -% 1573 0 obj +% 1829 0 obj << -/D [1570 0 R /XYZ 99.895 258.556 null] +/D [1824 0 R /XYZ 99.895 403.265 null] >> -% 1569 0 obj +% 1823 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1577 0 obj +% 1831 0 obj << /Type /Page -/Contents 1578 0 R -/Resources 1576 0 R +/Contents 1832 0 R +/Resources 1830 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1575 0 R ] +/Parent 1817 0 R >> -% 1575 0 obj +% 1833 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 573.77 409.811 585.83] -/A << /S /GoTo /D (descdata) >> +/D [1831 0 R /XYZ 149.705 753.953 null] >> -% 1579 0 obj +% 477 0 obj << -/D [1577 0 R /XYZ 149.705 753.953 null] +/D [1831 0 R /XYZ 150.705 716.092 null] >> -% 400 0 obj +% 1830 0 obj << -/D [1577 0 R /XYZ 150.705 716.092 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1580 0 obj +% 1835 0 obj << -/D [1577 0 R /XYZ 150.705 358.183 null] +/Type /Page +/Contents 1836 0 R +/Resources 1834 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1817 0 R >> -% 1581 0 obj +% 1837 0 obj << -/D [1577 0 R /XYZ 150.705 314.403 null] +/D [1835 0 R /XYZ 98.895 753.953 null] >> -% 1576 0 obj +% 1838 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1835 0 R /XYZ 99.895 528.579 null] >> -% 1584 0 obj +% 1839 0 obj << -/Type /Page -/Contents 1585 0 R -/Resources 1583 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1582 0 R ] +/D [1835 0 R /XYZ 99.895 494.104 null] >> -% 1582 0 obj +% 1840 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 573.77 359.001 585.83] -/A << /S /GoTo /D (descdata) >> +/D [1835 0 R /XYZ 99.895 403.265 null] >> -% 1586 0 obj +% 1834 0 obj << -/D [1584 0 R /XYZ 98.895 753.953 null] +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F60 666 0 R /F120 1782 0 R >> +/ProcSet [ /PDF /Text ] >> -% 405 0 obj +% 1842 0 obj << -/D [1584 0 R /XYZ 99.895 716.092 null] +/Type /Page +/Contents 1843 0 R +/Resources 1841 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1817 0 R >> -% 1587 0 obj +% 1844 0 obj << -/D [1584 0 R /XYZ 99.895 370.138 null] +/D [1842 0 R /XYZ 149.705 753.953 null] >> -% 1588 0 obj +% 481 0 obj << -/D [1584 0 R /XYZ 99.895 338.313 null] +/D [1842 0 R /XYZ 150.705 716.092 null] >> -% 1583 0 obj +% 1841 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F91 914 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1591 0 obj +% 1846 0 obj << /Type /Page -/Contents 1592 0 R -/Resources 1590 0 R +/Contents 1847 0 R +/Resources 1845 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1589 0 R ] +/Parent 1855 0 R >> -% 1589 0 obj +% 1848 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 491.971 420.271 504.031] -/A << /S /GoTo /D (spdata) >> +/D [1846 0 R /XYZ 98.895 753.953 null] >> -% 1593 0 obj +% 1849 0 obj << -/D [1591 0 R /XYZ 149.705 753.953 null] +/D [1846 0 R /XYZ 99.895 564.444 null] >> -% 409 0 obj +% 1850 0 obj << -/D [1591 0 R /XYZ 150.705 716.092 null] +/D [1846 0 R /XYZ 99.895 529.97 null] >> -% 1590 0 obj +% 1852 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F61 1360 0 R /F52 585 0 R >> +/D [1846 0 R /XYZ 99.895 441.815 null] +>> +% 1853 0 obj +<< +/D [1846 0 R /XYZ 99.895 409.935 null] +>> +% 1854 0 obj +<< +/D [1846 0 R /XYZ 99.895 319.095 null] +>> +% 1845 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R /F17 1851 0 R /F104 1254 0 R /F102 1016 0 R /F120 1782 0 R >> /ProcSet [ /PDF /Text ] >> -% 1595 0 obj +% 1857 0 obj << /Type /Page -/Contents 1596 0 R -/Resources 1594 0 R +/Contents 1858 0 R +/Resources 1856 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R ->> -% 1597 0 obj -<< -/D [1595 0 R /XYZ 98.895 753.953 null] +/Parent 1855 0 R >> -% 1598 0 obj +% 1859 0 obj << -/D [1595 0 R /XYZ 99.895 496.698 null] +/D [1857 0 R /XYZ 149.705 753.953 null] >> -% 1599 0 obj +% 485 0 obj << -/D [1595 0 R /XYZ 99.895 438.313 null] +/D [1857 0 R /XYZ 150.705 716.092 null] >> -% 1600 0 obj +% 1860 0 obj << -/D [1595 0 R /XYZ 99.895 418.388 null] +/D [1857 0 R /XYZ 150.705 222.691 null] >> -% 1594 0 obj +% 1856 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F69 1460 0 R /F93 915 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1605 0 obj +% 1862 0 obj << /Type /Page -/Contents 1606 0 R -/Resources 1604 0 R +/Contents 1863 0 R +/Resources 1861 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1574 0 R -/Annots [ 1601 0 R 1602 0 R 1603 0 R ] +/Parent 1855 0 R >> -% 1601 0 obj +% 1864 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 527.942 420.271 540.002] -/A << /S /GoTo /D (spdata) >> +/D [1862 0 R /XYZ 98.895 753.953 null] >> -% 1602 0 obj +% 489 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 460.196 409.811 472.256] -/A << /S /GoTo /D (descdata) >> +/D [1862 0 R /XYZ 99.895 716.092 null] >> -% 1603 0 obj +% 1865 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [371.126 404.405 438.184 416.465] -/A << /S /GoTo /D (precdata) >> +/D [1862 0 R /XYZ 99.895 222.691 null] >> -% 1607 0 obj +% 1861 0 obj << -/D [1605 0 R /XYZ 149.705 753.953 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F69 1460 0 R /F93 915 0 R /F60 666 0 R /F91 914 0 R >> +/ProcSet [ /PDF /Text ] >> -% 413 0 obj +% 1869 0 obj << -/D [1605 0 R /XYZ 150.705 716.092 null] +/Type /Page +/Contents 1870 0 R +/Resources 1868 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1855 0 R +/Annots [ 1866 0 R 1867 0 R ] >> -% 1604 0 obj +% 1866 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [196.011 499.949 202.985 512.009] +/A << /S /GoTo /D (listing.5) >> >> -% 1609 0 obj +% 1867 0 obj << -/Type /Page -/Contents 1610 0 R -/Resources 1608 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [196.625 344.532 203.599 356.591] +/A << /S /GoTo /D (listing.6) >> >> -% 1611 0 obj +% 1871 0 obj << -/D [1609 0 R /XYZ 98.895 753.953 null] +/D [1869 0 R /XYZ 149.705 753.953 null] >> -% 417 0 obj +% 493 0 obj << -/D [1609 0 R /XYZ 99.895 716.092 null] +/D [1869 0 R /XYZ 150.705 716.092 null] >> -% 1608 0 obj +% 1868 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1614 0 obj +% 1875 0 obj << /Type /Page -/Contents 1615 0 R -/Resources 1613 0 R +/Contents 1876 0 R +/Resources 1874 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R ->> -% 1616 0 obj -<< -/D [1614 0 R /XYZ 149.705 753.953 null] +/Parent 1855 0 R >> -% 1617 0 obj +% 1877 0 obj << -/D [1614 0 R /XYZ 150.705 701.929 null] +/D [1875 0 R /XYZ 98.895 753.953 null] >> -% 1618 0 obj +% 1872 0 obj << -/D [1614 0 R /XYZ 150.705 668.729 null] +/D [1875 0 R /XYZ 99.895 411.235 null] >> -% 1619 0 obj +% 1873 0 obj << -/D [1614 0 R /XYZ 150.705 624.894 null] +/D [1875 0 R /XYZ 99.895 182.902 null] >> -% 1620 0 obj +% 1874 0 obj << -/D [1614 0 R /XYZ 150.705 555.872 null] +/Font << /F67 913 0 R /F120 1782 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1621 0 obj +% 1879 0 obj << -/D [1614 0 R /XYZ 150.705 500.082 null] +/Type /Page +/Contents 1880 0 R +/Resources 1878 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1855 0 R >> -% 1622 0 obj +% 1881 0 obj << -/D [1614 0 R /XYZ 150.705 468.201 null] +/D [1879 0 R /XYZ 149.705 753.953 null] >> -% 1623 0 obj +% 497 0 obj << -/D [1614 0 R /XYZ 150.705 425.023 null] +/D [1879 0 R /XYZ 150.705 716.092 null] >> -% 1624 0 obj +% 1882 0 obj << -/D [1614 0 R /XYZ 150.705 382.522 null] +/D [1879 0 R /XYZ 150.705 690.058 null] >> -% 1625 0 obj +% 1883 0 obj << -/D [1614 0 R /XYZ 150.705 354.627 null] +/D [1879 0 R /XYZ 150.705 693.143 null] >> -% 1613 0 obj +% 1878 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1627 0 obj +% 1885 0 obj << /Type /Page -/Contents 1628 0 R -/Resources 1626 0 R +/Contents 1886 0 R +/Resources 1884 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Parent 1890 0 R >> -% 1629 0 obj +% 1887 0 obj << -/D [1627 0 R /XYZ 98.895 753.953 null] +/D [1885 0 R /XYZ 98.895 753.953 null] >> -% 421 0 obj +% 501 0 obj << -/D [1627 0 R /XYZ 99.895 716.092 null] +/D [1885 0 R /XYZ 99.895 716.092 null] >> -% 1626 0 obj +% 1888 0 obj +<< +/D [1885 0 R /XYZ 99.895 678.98 null] +>> +% 1889 0 obj +<< +/D [1885 0 R /XYZ 99.895 679.195 null] +>> +% 1884 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1631 0 obj +% 1892 0 obj << /Type /Page -/Contents 1632 0 R -/Resources 1630 0 R +/Contents 1893 0 R +/Resources 1891 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Parent 1890 0 R >> -% 1633 0 obj +% 1894 0 obj << -/D [1631 0 R /XYZ 149.705 753.953 null] +/D [1892 0 R /XYZ 149.705 753.953 null] >> -% 425 0 obj +% 505 0 obj << -/D [1631 0 R /XYZ 150.705 716.092 null] +/D [1892 0 R /XYZ 150.705 716.092 null] >> -% 1634 0 obj +% 1895 0 obj << -/D [1631 0 R /XYZ 150.705 222.691 null] +/D [1892 0 R /XYZ 150.705 689.963 null] >> -% 1635 0 obj +% 1896 0 obj << -/D [1631 0 R /XYZ 150.705 200.171 null] +/D [1892 0 R /XYZ 150.705 693.143 null] >> -% 1630 0 obj +% 1891 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F85 814 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1637 0 obj +% 1898 0 obj << /Type /Page -/Contents 1638 0 R -/Resources 1636 0 R +/Contents 1899 0 R +/Resources 1897 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R +/Parent 1890 0 R >> -% 1639 0 obj +% 1900 0 obj << -/D [1637 0 R /XYZ 98.895 753.953 null] +/D [1898 0 R /XYZ 98.895 753.953 null] >> -% 429 0 obj +% 509 0 obj << -/D [1637 0 R /XYZ 99.895 716.092 null] +/D [1898 0 R /XYZ 99.895 716.092 null] >> -% 1640 0 obj +% 1901 0 obj << -/D [1637 0 R /XYZ 99.895 348.22 null] +/D [1898 0 R /XYZ 99.895 678.98 null] >> -% 1641 0 obj +% 1902 0 obj << -/D [1637 0 R /XYZ 99.895 313.8 null] +/D [1898 0 R /XYZ 99.895 679.195 null] >> -% 1636 0 obj +% 1897 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1643 0 obj +% 1904 0 obj << /Type /Page -/Contents 1644 0 R -/Resources 1642 0 R +/Contents 1905 0 R +/Resources 1903 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1612 0 R ->> -% 1645 0 obj -<< -/D [1643 0 R /XYZ 149.705 753.953 null] ->> -% 433 0 obj -<< -/D [1643 0 R /XYZ 150.705 716.092 null] ->> -% 1646 0 obj -<< -/D [1643 0 R /XYZ 150.705 441.869 null] +/Parent 1890 0 R >> -% 1647 0 obj +% 1906 0 obj << -/D [1643 0 R /XYZ 150.705 395.439 null] +/D [1904 0 R /XYZ 149.705 753.953 null] >> -% 1648 0 obj +% 513 0 obj << -/D [1643 0 R /XYZ 150.705 363.559 null] +/D [1904 0 R /XYZ 150.705 716.092 null] >> -% 1642 0 obj +% 1903 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1650 0 obj +% 1909 0 obj << /Type /Page -/Contents 1651 0 R -/Resources 1649 0 R +/Contents 1910 0 R +/Resources 1908 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R ->> -% 1652 0 obj -<< -/D [1650 0 R /XYZ 98.895 753.953 null] +/Parent 1890 0 R +/Annots [ 1907 0 R ] >> -% 437 0 obj +% 1907 0 obj << -/D [1650 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 438.278 369.462 450.338] +/A << /S /GoTo /D (spdata) >> >> -% 1649 0 obj +% 1911 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1909 0 R /XYZ 98.895 753.953 null] >> -% 1655 0 obj +% 517 0 obj << -/Type /Page -/Contents 1656 0 R -/Resources 1654 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/D [1909 0 R /XYZ 99.895 716.092 null] >> -% 1657 0 obj +% 1912 0 obj << -/D [1655 0 R /XYZ 149.705 753.953 null] +/D [1909 0 R /XYZ 99.895 676.015 null] >> -% 441 0 obj +% 1913 0 obj << -/D [1655 0 R /XYZ 150.705 716.092 null] +/D [1909 0 R /XYZ 99.895 679.195 null] >> -% 1654 0 obj +% 1908 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1659 0 obj +% 1916 0 obj << /Type /Page -/Contents 1660 0 R -/Resources 1658 0 R +/Contents 1917 0 R +/Resources 1915 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/Parent 1890 0 R +/Annots [ 1914 0 R ] >> -% 1661 0 obj -<< -/D [1659 0 R /XYZ 98.895 753.953 null] ->> -% 445 0 obj +% 1914 0 obj << -/D [1659 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 571.778 420.271 583.837] +/A << /S /GoTo /D (spdata) >> >> -% 1658 0 obj +% 1918 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1916 0 R /XYZ 149.705 753.953 null] >> -% 1663 0 obj +% 521 0 obj << -/Type /Page -/Contents 1664 0 R -/Resources 1662 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/D [1916 0 R /XYZ 150.705 716.092 null] >> -% 1665 0 obj +% 1919 0 obj << -/D [1663 0 R /XYZ 149.705 753.953 null] +/D [1916 0 R /XYZ 150.705 676.015 null] >> -% 449 0 obj +% 1920 0 obj << -/D [1663 0 R /XYZ 150.705 716.092 null] +/D [1916 0 R /XYZ 150.705 679.195 null] >> -% 1662 0 obj +% 1915 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1667 0 obj +% 1923 0 obj << /Type /Page -/Contents 1668 0 R -/Resources 1666 0 R +/Contents 1924 0 R +/Resources 1922 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R +/Parent 1928 0 R +/Annots [ 1921 0 R ] >> -% 1669 0 obj +% 1921 0 obj << -/D [1667 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 438.278 369.462 450.338] +/A << /S /GoTo /D (spdata) >> >> -% 453 0 obj +% 1925 0 obj << -/D [1667 0 R /XYZ 99.895 716.092 null] +/D [1923 0 R /XYZ 98.895 753.953 null] >> -% 1666 0 obj +% 525 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [1923 0 R /XYZ 99.895 716.092 null] +>> +% 1926 0 obj +<< +/D [1923 0 R /XYZ 99.895 678.98 null] >> endstream endobj -1672 0 obj +1933 0 obj << -/Length 5526 +/Length 4155 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.9)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(9.4)-1000(mm)]TJ ET q -1 0 0 1 198.238 706.328 cm +1 0 0 1 199.577 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(bcast)-250(\227)-250(Broadcast)-250(data)]TJ -0 g 0 G +/F59 11.9552 Tf 203.164 706.129 Td [(array)]TJ +ET +q +1 0 0 1 231.784 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 235.371 706.129 Td [(read)-222(\227)-223(Read)-222(a)-223(dense)-222(array)-223(from)-222(a)-223(\002le)-222(in)-223(the)-222(Ma-)]TJ -57.767 -13.948 Td [(trixMarket)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-845(m)-71(m)]TJ +ET +q +1 0 0 1 200.884 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 204.572 667.24 Td [(a)-70(r)-70(r)-71(a)-70(y)]TJ +ET +q +1 0 0 1 232.04 667.439 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 235.728 667.24 Td [(r)-70(e)-70(a)-71(d)-203(\050)-150(b)-206(,)-926(i)-152(r)-151(e)-152(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-106(a)-107(m)-107(e)-241(\051)]TJ 0 g 0 G -/F59 9.9626 Tf -51.12 -20.269 Td [(call)-525(psb_bcast\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-221(subr)18(outine)-222(implements)-221(a)-221(br)18(oadcast)-222(operation)-221(based)-221(on)-222(the)-221(underlying)]TJ -14.944 -11.955 Td [(communication)-250(library)111(.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F59 9.9626 Tf -85.023 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -22.619 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ + 0 -19.925 Td [(\002lename)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.533 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F67 9.9626 Tf 65.184 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(the)-250(r)18(oot)-250(pr)18(ocess,)-250(the)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-344(a)-344(rank)-344(1)-344(or)-344(2)-344(array)111(,)-367(or)-344(a)-344(character)-344(or)-344(logical)-344(variable,)-367(which)-344(may)-344(be)]TJ 0 -11.955 Td [(a)-377(scalar)-377(or)-377(rank)-377(1)-377(array)111(.)-1067(T)90(ype,)-409(kind,)-409(rank)-377(and)-376(size)-377(must)-377(agr)18(ee)-377(on)-377(all)]TJ 0 -11.955 Td [(pr)18(ocesses.)]TJ +/F59 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +/F62 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Root)-250(pr)18(ocess)-250(holding)-250(data)-250(to)-250(be)-250(br)18(oadcast.)]TJ 1.654 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 10.986 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.503 0 Td [(\000)]TJ/F54 9.9626 Tf 10.132 0 Td [(1,)-250(default)-250(0)]TJ +/F59 9.9626 Tf -317.894 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -246.025 -22.618 Td [(mode)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ + 0 -19.926 Td [(b)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +/F62 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-398(array)-398(of)-397(type)-398(r)18(eal)-398(or)-398(complex,)-434(rank)-398(1)-398(or)-398(2)-398(and)-398(h)1(a)-1(v)1(ing)-398(the)-398(ALLO-)]TJ 0 -11.955 Td [(CA)74(T)74(ABLE)-257(attribute,)-258(or)-257(an)-257(object)-257(of)-257(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 177.91 0 Td [(psb)]TJ +ET +q +1 0 0 1 369.841 430.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 372.979 430.129 Td [(T)]TJ +ET +q +1 0 0 1 378.837 430.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 381.975 430.129 Td [(vect)]TJ +ET +q +1 0 0 1 403.524 430.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 406.663 430.129 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +/F62 9.9626 Tf 20.921 0 Td [(,)-259(of)-257(type)-256(r)18(eal)-257(or)]TJ -251.973 -11.955 Td [(complex.)]TJ 0 -11.955 Td [(W)55(ill)-275(be)-276(allocated)-275(and)-276(\002ll)1(ed)-276(in)-275(if)-276(the)-275(input)-275(\002le)-276(contains)-275(a)-275(right)-276(hand)-275(side,)]TJ 0 -11.956 Td [(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.906 -31.88 Td [(iret)]TJ 0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(120)]TJ + 139.476 -248.035 Td [(146)]TJ 0 g 0 G ET endstream endobj -1677 0 obj +1940 0 obj << -/Length 4820 +/Length 7231 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(all)-250(pr)18(ocesses)-250(other)-250(than)-250(r)18(oot,)-250(the)-250(br)18(oadcasted)-250(data.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-315(a)-314(rank)-315(1)-315(or)-314(2)-315(array)111(,)-331(or)-314(a)-315(character)-315(or)-314(logical)-315(scalar)74(.)-819(T)90(ype,)-330(kind,)-331(rank)]TJ 0 -11.955 Td [(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ -0 g 0 G - -12.453 -19.925 Td [(2.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F59 11.9552 Tf 99.895 706.129 Td [(9.5)-1000(mm)]TJ ET q -1 0 0 1 124.802 417.212 cm -0 0 318.804 27.895 re f +1 0 0 1 148.768 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 434.448 Td [(call)]TJ -0 g 0 G - [-525(psb_bcast\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(3.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F59 11.9552 Tf 152.354 706.129 Td [(mat)]TJ ET q -1 0 0 1 124.802 306.627 cm -0 0 318.804 60.772 re f +1 0 0 1 173.658 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 356.739 Td [(call)]TJ -0 g 0 G - [-525(psb_bcast\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(bcast_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F59 11.9552 Tf 177.245 706.129 Td [(write)-275(\227)-275(W)74(rite)-275(a)-275(sparse)-275(matrix)-275(to)-275(a)-275(\002le)-275(in)-275(the)-275(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.48 Td [(c)-175(a)-175(l)-174(l)-828(m)-52(m)]TJ +ET +q +1 0 0 1 149.539 667.901 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 153.049 667.701 Td [(m)-52(a)-53(t)]TJ +ET +q +1 0 0 1 172.236 667.901 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 175.746 667.701 Td [(w)-52(r)-53(i)-52(t)-52(e)-186(\050)-167(a)-242(,)-900(m)-126(t)-125(i)-126(t)-125(l)-126(e)-426(,)-926(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-130(t)-434(,)-882(f)-107(i)-107(l)-106(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ 0 g 0 G - [-525(psb_bcast\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(bcast_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F59 9.9626 Tf -75.851 -26.279 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -222.465 Td [(121)]TJ +/F59 9.9626 Tf -29.828 -19.464 Td [(On)-250(Entry)]TJ 0 g 0 G -ET - -endstream -endobj -1686 0 obj -<< -/Length 5829 ->> -stream 0 g 0 G + 0 -19.464 Td [(a)]TJ 0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 578.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.10)-1000(psb)]TJ +/F67 9.9626 Tf 312.397 578.584 Td [(Tspmat)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 344.406 578.783 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(sum)-250(\227)-250(Global)-250(sum)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_sum\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -22.401 Td [(This)-353(subr)18(outine)-353(implements)-354(a)-353(sum)-353(r)18(eduction)-353(operation)-353(based)-354(on)-353(the)-353(un-)]TJ -14.944 -11.955 Td [(derlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +/F67 9.9626 Tf 347.544 578.584 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 0 -20.408 Td [(ctxt)]TJ +/F59 9.9626 Tf -268.571 -19.464 Td [(mtitle)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(A)-231(charachter)-230(variable)-231(holding)-231(a)-230(descriptive)-231(title)-231(for)-230(the)-231(matrix)-231(to)-230(be)-231(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +/F59 9.9626 Tf -24.907 -19.464 Td [(\002lename)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(sum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.534 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-233(\050i.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.956 Td [(is)-250(used.)-310(Default:)]TJ/F67 9.9626 Tf 74.799 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.408 Td [(root)]TJ +/F59 9.9626 Tf -104.936 -19.463 Td [(iunit)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(to)-250(hold)-250(the)-250(\002nal)-250(sum,)-250(or)]TJ/F83 10.3811 Tf 143.744 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-250(to)-250(make)-250(it)-250(available)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -150.284 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F62 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -20.409 Td [(mode)]TJ +/F59 9.9626 Tf -317.895 -20.764 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -32.364 Td [(request)]TJ + 0 -19.463 Td [(iret)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -20.763 Td [(Notes)]TJ/F62 9.9626 Tf 14.944 -11.956 Td [(If)-283(this)-282(function)-283(is)-283(called)-282(on)-283(a)-282(matrix)-283(a)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ 0 g 0 G + [-283(on)-282(a)-283(distributed)-283(communicator)-282(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-225(matrix)-225(when)-225(appr)18(opriate,)-230(e.g.)-302(for)-225(debugging)-225(purposes,)-230(one)-225(could)]TJ/F60 9.9626 Tf 318.257 0 Td [(gather)]TJ/F62 9.9626 Tf -318.257 -11.955 Td [(the)-339(whole)-338(matrix)-339(on)-338(a)-339(single)-338(rank)-339(and)-338(then)-339(write)-338(it.)-576(Consider)-339(the)-338(following)]TJ 0 -11.955 Td [(example)-250(for)-250(a)]TJ/F60 9.9626 Tf 62.495 0 Td [(double)]TJ/F62 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(matrix)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 99.895 178.717 cm +0 0 343.711 82.69 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - 0 -20.408 Td [(dat)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 102.884 250.747 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(sum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.781 0 Td [(.)]TJ + [(\050psb_ldspmat_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 79.264 -29.887 Td [(122)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -ET - -endstream -endobj -1690 0 obj -<< -/Length 4455 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(aglobal)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(call)]TJ 0 g 0 G -BT -/F54 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.956 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ + [-525(psb_gather\050aglobal,a,desc_a,info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ + [-525(\050iam)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(==)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - -12.453 -19.926 Td [(2.)]TJ + [-525(psb_root_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 124.802 441.123 cm -0 0 318.804 27.895 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 137.205 458.358 Td [(call)]TJ + 37.659 -10.959 Td [(call)]TJ 0 g 0 G - [-525(psb_sum\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-525(mm_mat_write\050aglobal,mtitle,info,filename\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -37.659 -10.959 Td [(end)-525(if)]TJ 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ + 0 -10.959 Td [(call)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(psb_spfree\050aglobal,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(desc_a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf -48.393 -32.179 Td [(3.)]TJ + [-525(info\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ +/F62 9.9626 Tf -2.989 -23.747 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F67 9.9626 Tf 129.513 0 Td [(C)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 124.802 330.537 cm -0 0 318.804 60.772 re f +1 0 0 1 99.895 137.797 cm +0 0 343.711 16.936 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 380.649 Td [(call)]TJ -0 g 0 G - [-525(psb_sum\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.958 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(sum_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F102 8.9664 Tf 102.884 144.073 Td [(psb_i_t)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_sum\050ctxt,dat,mode)]TJ + [-525(psb_c_)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [(<)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ + [(s,d,c,z)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [(>)]TJ 0 g 0 G - [(sum_request\051)]TJ + [(global_mat_write\050ah,cdh\051;)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +/F62 9.9626 Tf -2.989 -23.747 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -246.376 Td [(123)]TJ + 164.384 -29.888 Td [(147)]TJ 0 g 0 G ET endstream endobj -1697 0 obj +1947 0 obj << -/Length 5548 +/Length 7073 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.11)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(9.6)-1000(mm)]TJ ET q -1 0 0 1 204.216 706.328 cm +1 0 0 1 199.577 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(max)-250(\227)-250(Global)-250(maximum)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_max\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-354(subr)18(outine)-354(implements)-354(a)-354(maximum)-354(valuer)18(eduction)-354(operation)-354(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.619 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ -0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-255(to)-255(hold)-255(the)-255(\002nal)-255(maximum,)-257(or)]TJ/F83 10.3811 Tf 170.502 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-255(to)-255(make)-255(it)-255(available)-255(on)-255(all)-255(pr)18(o-)]TJ -177.042 -11.955 Td [(cesses.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F59 11.9552 Tf 203.164 706.129 Td [(array)]TJ +ET +q +1 0 0 1 231.784 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 235.371 706.129 Td [(write)-374(\227)-375(W)74(rite)-374(a)-375(dense)-374(array)-374(from)-375(a)-374(\002le)-375(in)-374(the)]TJ -57.767 -13.948 Td [(MatrixMarket)-250(format)]TJ/F62 9.9626 Tf -25.158 -24.509 Td [(c)-175(a)-175(l)-174(l)-858(m)-83(m)]TJ +ET +q +1 0 0 1 201.262 667.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 205.076 667.672 Td [(a)-83(r)-83(r)-83(a)-83(y)]TJ +ET +q +1 0 0 1 233.175 667.872 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 236.989 667.672 Td [(w)-83(r)-83(i)-83(t)-82(e)-217(\050)-149(b)-206(,)-941(v)-165(t)-165(i)-165(t)-166(l)-165(e)-505(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-130(u)-129(n)-130(i)-129(t)-435(,)-881(f)-107(i)-107(l)-107(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +/F59 9.9626 Tf -86.284 -26.38 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -29.828 -19.493 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(124)]TJ + 0 -19.493 Td [(b)]TJ 0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(An)-190(array)-190(of)-190(type)-190(r)18(eal)-190(or)-190(complex,)-202(rank)-190(1)-190(or)-190(2,)-202(or)-190(an)-190(object)-190(of)-190(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 277.745 0 Td [(psb)]TJ ET - -endstream -endobj -1701 0 obj -<< -/Length 4718 ->> -stream -0 g 0 G -0 g 0 G -0 g 0 G +q +1 0 0 1 469.676 578.595 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.955 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ -0 g 0 G - -12.453 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 472.814 578.396 Td [(T)]TJ ET q -1 0 0 1 124.802 429.167 cm -0 0 318.804 27.895 re f +1 0 0 1 478.672 578.595 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 446.403 Td [(call)]TJ -0 g 0 G - [-525(psb_max\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.179 Td [(3.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.956 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 481.81 578.396 Td [(vect)]TJ ET q -1 0 0 1 124.802 318.582 cm -0 0 318.804 60.772 re f +1 0 0 1 503.359 578.595 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 368.694 Td [(call)]TJ +/F67 9.9626 Tf 506.497 578.396 Td [(type)]TJ 0 g 0 G - [-525(psb_max\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 20.922 0 Td [(,)]TJ -351.808 -11.955 Td [(of)-250(type)-250(r)18(eal)-250(or)-250(complex;)-250(its)-250(contents)-250(will)-250(be)-250(written)-250(to)-250(disk.)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -24.906 -31.448 Td [(\002lename)]TJ 0 g 0 G - [(max_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F62 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written.)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.958 Td [(call)]TJ +/F59 9.9626 Tf -44.274 -31.448 Td [(vtitle)]TJ 0 g 0 G - [-525(psb_max\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 28.772 0 Td [(Matrix)-250(title.)]TJ -3.865 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(A)-244(charachter)-243(variable)-244(holding)-244(a)-243(descriptive)-244(title)-244(for)-243(the)-244(vector)-244(to)-243(be)-244(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)-310(T)90(ype:)]TJ/F59 9.9626 Tf 70.763 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -108.95 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F67 9.9626 Tf 298.533 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.956 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F67 9.9626 Tf 65.184 0 Td [(-)]TJ/F62 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -95.322 -19.492 Td [(iunit)]TJ 0 g 0 G - [(max_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 23.999 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F67 9.9626 Tf 287.757 0 Td [(-)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G +/F59 9.9626 Tf -317.894 -20.836 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -234.421 Td [(125)]TJ 0 g 0 G -ET - -endstream -endobj -1708 0 obj -<< -/Length 5813 ->> -stream + 0 -19.492 Td [(iret)]TJ 0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F59 11.9552 Tf -24.907 -20.836 Td [(Notes)]TJ/F62 9.9626 Tf 14.944 -11.955 Td [(If)-290(this)-289(function)-290(is)-290(call)1(ed)-290(on)-290(a)-289(vector)-290(v)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.12)-1000(psb)]TJ +0 g 0 G + [-290(on)-289(a)-290(distributed)-290(communicator)-289(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-243(vect)1(or)-243(when)-243(appr)18(opriate,)-244(e.g.)-307(for)-243(debugging)-242(purposes,)-244(one)-243(could)]TJ/F60 9.9626 Tf 318.257 0 Td [(gather)]TJ/F62 9.9626 Tf -318.257 -11.955 Td [(the)-349(whole)-349(vector)-349(on)-349(a)-349(single)-349(rank)-349(and)-349(then)-349(writ)1(e)-349(it.)-607(Consider)-349(the)-349(following)]TJ 0 -11.956 Td [(example)-250(for)-250(a)]TJ/F60 9.9626 Tf 62.495 0 Td [(double)]TJ/F62 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(vector)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 150.705 149.348 cm +0 0 343.711 82.69 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG BT -/F51 11.9552 Tf 207.803 706.129 Td [(min)-250(\227)-250(Global)-250(minimum)]TJ +/F102 8.9664 Tf 153.694 221.378 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_min\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -22.401 Td [(This)-328(subr)18(outine)-327(implements)-328(a)-328(minimum)-327(value)-328(r)18(eduction)-328(o)1(peration)-328(based)]TJ -14.944 -11.955 Td [(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ + [-525(vglobal\050:\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(call)]TJ 0 g 0 G + [-525(psb_gather\050vglobal,v,desc,info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ 0 g 0 G - 0 -20.408 Td [(ctxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ + [-525(\050iam)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(==)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)-560(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -20.408 Td [(root)]TJ + [-525(psb_root_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ + [-525(mm_array_write\050vglobal,vtitle,info,filename\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.958 Td [(end)-525(if)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -32.364 Td [(request)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)-525(deallocate)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ + [(\050vglobal,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(stat)]TJ 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - 0 -20.408 Td [(dat)]TJ + [(info\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.781 0 Td [(.)]TJ +/F62 9.9626 Tf -2.989 -23.777 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F67 9.9626 Tf 129.513 0 Td [(C)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ 0 g 0 G - 79.264 -29.887 Td [(126)]TJ + 29.64 -41.41 Td [(148)]TJ 0 g 0 G ET endstream endobj -1712 0 obj +1953 0 obj << -/Length 4437 +/Length 655 >> stream 0 g 0 G 0 g 0 G -BT -/F54 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-264(as:)-339(an)-264(integer)-264(or)-265(r)18(eal)-264(variable,)-268(which)-264(may)-264(be)-264(a)-265(scalar)74(,)-268(or)-264(a)-264(rank)]TJ 0 -11.956 Td [(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ -0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ -0 g 0 G - -12.453 -19.926 Td [(2.)]TJ -0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET q -1 0 0 1 124.802 441.123 cm -0 0 318.804 27.895 re f +1 0 0 1 99.895 695.17 cm +0 0 343.711 16.936 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 458.358 Td [(call)]TJ +/F102 8.9664 Tf 102.884 701.446 Td [(psb_i_t)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_min\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ + [-525(psb_c_)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [(<)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ + [(s,d,c,z)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(>)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ + [(global_vec_write\050vh,cdh\051;)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +/F62 9.9626 Tf -2.989 -24.209 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ 0 g 0 G -/F54 9.9626 Tf -48.393 -32.179 Td [(3.)]TJ + 164.384 -586.799 Td [(149)]TJ 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET -q -1 0 0 1 124.802 330.537 cm -0 0 318.804 60.772 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 137.205 380.649 Td [(call)]TJ -0 g 0 G - [-525(psb_min\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.958 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + +endstream +endobj +1957 0 obj +<< +/Length 1109 +>> +stream 0 g 0 G - [(min_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +BT +/F59 14.3462 Tf 150.705 705.784 Td [(10)-1000(Preconditioner)-250(routines)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-228(base)-227(PSBLAS)-228(library)-227(contains)-228(the)-227(implementation)-228(of)-227(two)-228(simple)-227(pr)18(econdi-)]TJ 0 -11.955 Td [(tioning)-250(techniques:)]TJ 0 g 0 G - [-525(psb_min\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + 13.888 -19.925 Td [(\225)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [-500(Diagonal)-250(Scaling)]TJ 0 g 0 G - [(min_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + 0 -19.926 Td [(\225)]TJ 0 g 0 G + [-500(Block)-250(Jacobi)-250(with)-250(ILU\0500\051)-250(factorization)]TJ -13.888 -19.925 Td [(The)-356(supporting)-356(data)-356(type)-356(and)-356(subr)18(outine)-356(interfaces)-356(ar)18(e)-356(de\002ned)-356(in)-356(the)-356(mod-)]TJ 0 -11.955 Td [(ule)]TJ/F67 9.9626 Tf 16.301 0 Td [(psb_prec_mod)]TJ/F62 9.9626 Tf 62.764 0 Td [(.)-350(The)-263(old)-263(interfaces)]TJ/F67 9.9626 Tf 87.314 0 Td [(psb_precinit)]TJ/F62 9.9626 Tf 65.386 0 Td [(and)]TJ/F67 9.9626 Tf 19.489 0 Td [(psb_precbld)]TJ/F62 9.9626 Tf 60.156 0 Td [(ar)18(e)-263(still)]TJ -311.41 -11.955 Td [(supported)-250(for)-250(backwar)18(d)-250(compatibility)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -246.376 Td [(127)]TJ + 164.383 -497.003 Td [(150)]TJ 0 g 0 G ET endstream endobj -1720 0 obj +1963 0 obj << -/Length 5616 +/Length 5016 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.13)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(amx)-250(\227)-250(Global)-250(maximum)-250(absolute)-250(value)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(10.1)-1000(init)-250(\227)-250(Initialize)-250(a)-250(preconditioner)]TJ 0 g 0 G 0 g 0 G -/F59 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amx\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-342(subr)18(outine)-342(implements)-342(a)-342(maximum)-341(absolute)-342(value)-342(r)18(eduction)-342(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ +/F67 9.9626 Tf 0 -18.964 Td [(call)-525(prec%init\050icontxt,ptype,)-525(info\051)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -22.619 Td [(ctxt)]TJ + 0 -19.925 Td [(icontxt)]TJ +0 g 0 G +/F62 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 28.343 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 24 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(ptype)]TJ +0 g 0 G +/F62 9.9626 Tf 30.994 0 Td [(the)-250(type)-250(of)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 151.121 0 Td [(global)]TJ/F62 9.9626 Tf -157.208 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(string,)-250(see)-250(usage)-250(notes.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(On)-250(Exit)]TJ +0 g 0 G +0 g 0 G + 0 -19.925 Td [(prec)]TJ +0 g 0 G +/F62 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ +ET +q +1 0 0 1 337.631 446.268 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 340.77 446.069 Td [(prec)]TJ +ET +q +1 0 0 1 362.319 446.268 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 365.457 446.069 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(info)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F59 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ/F62 9.9626 Tf 34.311 0 Td [(Legal)-245(inputs)-244(to)-245(this)-245(subr)18(outine)-245(ar)18(e)-244(interpr)18(eted)-245(depending)-245(on)-244(the)]TJ/F60 9.9626 Tf 285.595 0 Td [(p)-25(t)-25(y)-80(p)-25(e)]TJ/F62 9.9626 Tf -319.906 -11.956 Td [(string)-250(as)-250(follows)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 72.358 3.617 Td [(4)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(maximum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ +/F62 9.9626 Tf 4.284 -3.617 Td [(:)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ +/F59 9.9626 Tf -76.642 -19.925 Td [(NONE)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F62 9.9626 Tf 35.965 0 Td [(No)-250(pr)18(econditioning,)-250(i.e.)-310(the)-250(pr)18(econditioner)-250(is)-250(just)-250(a)-250(copy)-250(operator)74(.)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ +/F59 9.9626 Tf -35.965 -19.925 Td [(DIAG)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +/F62 9.9626 Tf 33.205 0 Td [(Diagonal)-371(scaling;)-432(each)-371(entry)-372(of)-371(the)-371(input)-371(vector)-372(is)-371(multiplied)-371(by)-371(the)]TJ -8.298 -11.955 Td [(r)18(ecipr)18(ocal)-266(of)-267(the)-266(sum)-267(of)-266(the)-266(absolute)-267(values)-266(of)-267(the)-266(coef)18(\002cients)-266(in)-267(the)-266(cor)18(-)]TJ 0 -11.955 Td [(r)18(esponding)-250(r)18(ow)-250(of)-250(matrix)]TJ/F60 9.9626 Tf 116.148 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ +/F59 9.9626 Tf -148.373 -19.926 Td [(BJAC)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ +/F62 9.9626 Tf 30.446 0 Td [(Pr)18(econdition)-211(by)-212(a)-211(factorization)-212(of)-211(the)-212(block-diagonal)-211(of)-212(matrix)]TJ/F60 9.9626 Tf 273.867 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(,)-219(wher)18(e)]TJ -286.723 -11.955 Td [(block)-347(boundaries)-348(ar)18(e)-347(determined)-347(by)-348(the)-347(data)-347(allocation)-348(boundaries)-347(for)]TJ 0 -11.955 Td [(each)-223(pr)18(ocess;)-232(r)18(equir)18(es)-222(no)-223(communication.)-301(Only)-223(the)-222(incomplete)-223(factoriza-)]TJ 0 -11.955 Td [(tion)]TJ/F60 9.9626 Tf 20.498 0 Td [(I)-96(L)-9(U)]TJ/F93 10.3811 Tf 18.202 0 Td [(\050)]TJ/F62 9.9626 Tf 4.149 0 Td [(0)]TJ/F93 10.3811 Tf 5.106 0 Td [(\051)]TJ/F62 9.9626 Tf 6.64 0 Td [(is)-250(curr)18(ently)-250(implemented.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +ET +q +1 0 0 1 99.895 130.181 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +Q +BT +/F62 5.9776 Tf 110.755 123.219 Td [(4)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(The)-250(string)-250(is)-250(case-insensitive)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(128)]TJ +/F62 9.9626 Tf 150.037 -29.888 Td [(151)]TJ 0 g 0 G ET endstream endobj -1724 0 obj +1973 0 obj << -/Length 4760 +/Length 7572 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(10.2)-1000(build)-250(\227)-250(Builds)-250(a)-250(preconditioner)]TJ 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(maximum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +/F67 9.9626 Tf 0 -20.364 Td [(call)-525(prec%build\050a,)-525(desc_a,)-525(info[,amold,vmold,imold]\051)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F59 9.9626 Tf 0 -24.086 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +/F59 9.9626 Tf -29.828 -22.815 Td [(On)-250(Entry)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + 0 -22.816 Td [(a)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(the)-250(system)-250(sparse)-250(matrix.)-310(Scope:)]TJ/F59 9.9626 Tf 146.229 0 Td [(local)]TJ/F62 9.9626 Tf -131.285 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(sparse)-250(matrix)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 190.872 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 417.212 cm -0 0 318.804 27.895 re f +1 0 0 1 382.802 580.382 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 385.94 580.183 Td [(Tspmat)]TJ +ET +q +1 0 0 1 417.95 580.382 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 434.448 Td [(call)]TJ +/F67 9.9626 Tf 421.088 580.183 Td [(type)]TJ 0 g 0 G - [-525(psb_amx\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ +/F59 9.9626 Tf -291.305 -22.815 Td [(prec)]TJ 0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(alr)18(eady)-190(initialized)-190(pr)18(econdtioner)-190(data)-190(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 277.288 0 Td [(psb)]TJ +ET +q +1 0 0 1 469.217 509.746 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 472.355 509.547 Td [(prec)]TJ +ET +q +1 0 0 1 493.904 509.746 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 497.043 509.547 Td [(type)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(3.)]TJ +/F59 9.9626 Tf -346.338 -34.771 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 474.975 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 474.776 Td [(a)]TJ 0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 208.625 0 Td [(local)]TJ/F62 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(descriptor)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 247.649 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 306.627 cm -0 0 318.804 60.772 re f +1 0 0 1 439.579 439.11 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +BT +/F67 9.9626 Tf 442.717 438.911 Td [(desc)]TJ +ET +q +1 0 0 1 464.266 439.11 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 467.404 438.911 Td [(type)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -337.62 -22.816 Td [(amold)]TJ +0 g 0 G +/F62 9.9626 Tf 33.763 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -8.857 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F67 9.9626 Tf 201.393 0 Td [(psb)]TJ +ET +q +1 0 0 1 393.323 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 396.461 368.275 Td [(T)]TJ +ET +q +1 0 0 1 402.319 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 405.457 368.275 Td [(base)]TJ +ET +q +1 0 0 1 427.006 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 430.144 368.275 Td [(sparse)]TJ +ET +q +1 0 0 1 462.154 368.474 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F94 8.9664 Tf 127.791 356.739 Td [(call)]TJ +/F67 9.9626 Tf 465.292 368.275 Td [(mat)]TJ/F62 9.9626 Tf 15.691 0 Td [(.)]TJ 0 g 0 G - [-525(psb_amx\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -330.278 -22.816 Td [(vmold)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.536 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 34.321 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -9.415 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F67 9.9626 Tf 201.393 0 Td [(psb)]TJ +ET +q +1 0 0 1 393.323 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 396.461 297.638 Td [(T)]TJ +ET +q +1 0 0 1 402.319 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 405.457 297.638 Td [(base)]TJ +ET +q +1 0 0 1 427.006 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 430.144 297.638 Td [(vect)]TJ +ET +q +1 0 0 1 451.693 297.838 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 454.832 297.638 Td [(type)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(amx_request\051)]TJ -14.122 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F59 9.9626 Tf -325.048 -22.815 Td [(imold)]TJ +0 g 0 G +/F62 9.9626 Tf 32.099 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(integer)-250(vector)-250(storage.)]TJ -7.193 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(object)-190(of)-190(a)-190(class)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F67 9.9626 Tf 235.804 0 Td [(psb)]TJ +ET +q +1 0 0 1 427.733 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 430.872 227.002 Td [(T)]TJ +ET +q +1 0 0 1 436.73 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 439.868 227.002 Td [(base)]TJ +ET +q +1 0 0 1 461.417 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 464.555 227.002 Td [(vect)]TJ +ET +q +1 0 0 1 486.104 227.202 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 489.242 227.002 Td [(type)]TJ/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F59 9.9626 Tf -359.459 -24.085 Td [(On)-250(Return)]TJ 0 g 0 G - [-525(psb_amx\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + 0 -22.816 Td [(prec)]TJ 0 g 0 G - [(amx_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econdtioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 193.612 0 Td [(psb)]TJ +ET +q +1 0 0 1 385.542 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 388.68 132.281 Td [(prec)]TJ +ET +q +1 0 0 1 410.229 132.48 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 413.367 132.281 Td [(type)]TJ 0 g 0 G 0 g 0 G -/F54 9.9626 Tf 103.537 -222.465 Td [(129)]TJ +/F62 9.9626 Tf -98.279 -41.843 Td [(152)]TJ 0 g 0 G ET endstream endobj -1731 0 obj +1977 0 obj << -/Length 5619 +/Length 1021 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.14)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(amn)-250(\227)-250(Global)-250(minimum)-250(absolute)-250(value)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -20.269 Td [(call)-525(psb_amn\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -24.611 Td [(This)-360(subr)18(outine)-360(impl)1(ements)-360(a)-360(minimum)-360(absolute)-360(value)-359(r)18(eduction)-360(opera-)]TJ -14.944 -11.955 Td [(tion)-250(based)-250(on)-250(the)-250(underlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.945 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -22.619 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -22.619 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(dat)]TJ -0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-300(a)-300(rank)-300(1)-300(or)-301(2)-300(array)111(.)-760(T)90(ype,)-313(kind,)-312(rank)-300(and)-301(size)-300(must)-300(agr)18(ee)-300(on)-300(all)-300(pr)18(o-)]TJ 0 -11.955 Td [(cesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -22.619 Td [(root)]TJ -0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ -0 g 0 G -/F51 9.9626 Tf -254.343 -34.574 Td [(mode)]TJ -0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.955 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.956 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -34.573 Td [(request)]TJ -0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -24.612 Td [(On)-250(Return)]TJ +BT +/F59 9.9626 Tf 99.895 706.129 Td [(info)]TJ 0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -24.907 -21.918 Td [(The)]TJ/F67 9.9626 Tf 20.388 0 Td [(amold)]TJ/F62 9.9626 Tf 26.152 0 Td [(,)]TJ/F67 9.9626 Tf 6.506 0 Td [(vmold)]TJ/F62 9.9626 Tf 29.862 0 Td [(and)]TJ/F67 9.9626 Tf 20.577 0 Td [(imold)]TJ/F62 9.9626 Tf 29.862 0 Td [(ar)18(guments)-372(may)-373(be)-372(employed)-373(to)-372(interface)-372(with)]TJ -133.347 -11.955 Td [(special)-250(devices,)-250(such)-250(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ 0 g 0 G -/F54 9.9626 Tf 164.384 -29.887 Td [(130)]TJ + 164.384 -533.997 Td [(153)]TJ 0 g 0 G ET endstream endobj -1735 0 obj +1985 0 obj << -/Length 4753 +/Length 5673 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(dat)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(10.3)-1000(apply)-250(\227)-250(Preconditioner)-250(application)-250(routine)]TJ 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(minimum)-250(operation.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-250(a)-250(rank)-250(1)-250(or)-250(2)-250(array)111(.)]TJ 0 -11.955 Td [(T)90(ype,)-250(kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +/F67 9.9626 Tf 0 -18.964 Td [(call)-525(prec%apply\050x,y,desc_a,info,trans,work\051)]TJ 0 -11.955 Td [(call)-525(prec%apply\050x,desc_a,info,trans\051)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.956 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - -12.453 -19.925 Td [(2.)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + 0 -19.925 Td [(prec)]TJ +0 g 0 G +/F62 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 117.837 0 Td [(local)]TJ/F62 9.9626 Tf -117.279 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 417.212 cm -0 0 318.804 27.895 re f +1 0 0 1 388.441 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 434.448 Td [(call)]TJ -0 g 0 G - [-525(psb_amn\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(3.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 391.579 577.576 Td [(prec)]TJ ET q -1 0 0 1 124.802 306.627 cm -0 0 318.804 60.772 re f +1 0 0 1 413.128 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 356.739 Td [(call)]TJ -0 g 0 G - [-525(psb_amn\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_start_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(amn_request\051)]TJ -23.537 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ -0 g 0 G - [-525(psb_amn\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(amn_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 416.266 577.576 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -222.465 Td [(131)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(x)]TJ 0 g 0 G +/F62 9.9626 Tf 9.962 0 Td [(the)-250(sour)18(ce)-250(vector)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 111.142 0 Td [(local)]TJ/F62 9.9626 Tf -96.198 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.688 0 Td [(psb)]TJ ET - -endstream -endobj -1742 0 obj -<< -/Length 5776 ->> -stream -0 g 0 G -0 g 0 G +q +1 0 0 1 410.618 521.985 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.15)-1000(psb)]TJ +/F67 9.9626 Tf 413.756 521.785 Td [(T)]TJ ET q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 419.614 521.985 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 207.803 706.129 Td [(nrm2)-250(\227)-250(Global)-250(2-norm)-250(reduction)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -19.198 Td [(call)-525(psb_nrm2\050ctxt,)-525(dat)-525([,)-525(root,)-525(mode,)-525(request]\051)]TJ/F54 9.9626 Tf 14.944 -22.401 Td [(This)-297(subr)18(outine)-296(implements)-297(a)-297(2-norm)-296(value)-297(r)18(eduction)-297(operation)-296(based)-297(on)]TJ -14.944 -11.955 Td [(the)-250(underlying)-250(communication)-250(library)111(.)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -20.288 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -20.409 Td [(On)-250(Entry)]TJ +/F67 9.9626 Tf 422.752 521.785 Td [(vect)]TJ +ET +q +1 0 0 1 444.301 521.985 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 447.439 521.785 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 0 -20.408 Td [(ctxt)]TJ +/F59 9.9626 Tf -317.656 -19.925 Td [(desc)]TJ +ET +q +1 0 0 1 171.218 502.059 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F59 9.9626 Tf 174.207 501.86 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 208.625 0 Td [(local)]TJ/F62 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 200.207 0 Td [(psb)]TJ +ET +q +1 0 0 1 392.137 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 395.275 465.994 Td [(desc)]TJ +ET +q +1 0 0 1 416.824 466.194 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 419.963 465.994 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.409 Td [(dat)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(local)-250(contribution)-250(to)-250(the)-250(global)-250(minimum.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-370(as:)-551(a)-371(r)18(eal)-370(variable,)-401(which)-370(may)-371(be)-370(a)-371(scalar)74(,)-400(or)-371(a)-370(rank)-371(1)-370(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ +/F59 9.9626 Tf -290.179 -19.925 Td [(trans)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -20.408 Td [(root)]TJ +/F62 9.9626 Tf 27.666 0 Td [(Scope:)]TJ -2.76 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)74(.)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-221(to)-221(hold)-221(the)-222(\002nal)-221(value,)-227(or)]TJ/F83 10.3811 Tf 147.052 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1)-221(to)-221(make)-222(it)-221(available)-221(on)-221(all)-221(pr)18(ocesses.)]TJ -153.592 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)]TJ/F83 10.3811 Tf 131.101 0 Td [(\000)]TJ/F54 9.9626 Tf 8.195 0 Td [(1)]TJ/F61 10.3811 Tf 7.873 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 10.987 0 Td [(r)-17(o)-35(o)-35(t)]TJ/F61 10.3811 Tf 19.923 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1,)-250(default)-250(-1.)]TJ +/F59 9.9626 Tf -24.906 -19.926 Td [(work)]TJ 0 g 0 G -/F51 9.9626 Tf -254.343 -32.364 Td [(mode)]TJ +/F62 9.9626 Tf 28.782 0 Td [(an)-250(optional)-250(work)-250(space)-250(Scope:)]TJ/F59 9.9626 Tf 136.476 0 Td [(local)]TJ/F62 9.9626 Tf -140.352 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(optional)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(double)-250(pr)18(ecision)-250(array)111(.)]TJ 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Whether)-314(the)-314(call)-313(is)-314(started)-314(in)-314(non-blocking)-314(mode)-314(and)-313(completed)-314(later)74(,)]TJ -5.539 -11.955 Td [(or)-250(is)-250(executed)-250(synchr)18(onously)111(.)]TJ 0 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-325(as:)-460(an)-325(i)1(nteger)-325(value.)-535(The)-325(action)-325(to)-325(be)-325(t)1(aken)-325(is)-325(determined)-325(by)]TJ 0 -11.956 Td [(its)-375(bit)-374(\002elds,)-406(which)-375(can)-374(be)-375(set)-374(with)-375(bitwise)]TJ/F59 9.9626 Tf 199.497 0 Td [(OR)]TJ/F54 9.9626 Tf 10.461 0 Td [(.)-375(Basic)-374(action)-375(values)-374(ar)18(e)]TJ/F59 9.9626 Tf -209.958 -11.955 Td [(psb_collective_start_)]TJ/F54 9.9626 Tf 109.837 0 Td [(,)]TJ/F59 9.9626 Tf 4.545 0 Td [(psb_collective_end_)]TJ/F54 9.9626 Tf 99.377 0 Td [(.)-292(Default:)-282(both)-196(\002elds)-195(ar)18(e)]TJ -213.759 -11.955 Td [(selected)-250(\050i.e.)-310(r)18(equir)18(e)-250(synchr)18(onous)-250(completion\051.)]TJ +/F59 9.9626 Tf -24.906 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -32.364 Td [(request)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.578 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.956 Td [(be)-250(pr)18(esent.)]TJ + 0 -19.925 Td [(y)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -22.401 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 10.52 0 Td [(the)-250(destination)-250(vector)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 131.914 0 Td [(local)]TJ/F62 9.9626 Tf -117.528 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.688 0 Td [(psb)]TJ +ET +q +1 0 0 1 410.618 276.904 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 413.756 276.704 Td [(T)]TJ +ET +q +1 0 0 1 419.614 276.904 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 422.752 276.704 Td [(vect)]TJ +ET +q +1 0 0 1 444.301 276.904 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 447.439 276.704 Td [(type)]TJ 0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 0 -20.408 Td [(dat)]TJ +/F59 9.9626 Tf -317.656 -19.925 Td [(info)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(On)-250(destination)-250(pr)18(ocess\050es\051,)-250(the)-250(r)18(esult)-250(of)-250(the)-250(2-norm)-250(r)18(eduction.)]TJ 5.54 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.781 0 Td [(.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G - 79.264 -29.887 Td [(132)]TJ + 139.477 -118.52 Td [(154)]TJ 0 g 0 G ET endstream endobj -1746 0 obj +1990 0 obj << -/Length 5783 +/Length 3387 >> stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 124.802 706.129 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(variable,)-250(which)-250(may)-250(be)-250(a)-250(scalar)74(,)-250(or)-250(a)-250(rank)-250(1)-250(array)111(.)]TJ 0 -11.956 Td [(Kind,)-250(rank)-250(and)-250(size)-250(must)-250(agr)18(ee)-250(on)-250(all)-250(pr)18(ocesses.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(request)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(10.4)-1000(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(current)-250(preconditioner)]TJ 0 g 0 G -/F54 9.9626 Tf 38.735 0 Td [(A)-250(r)18(equest)-250(variable)-250(to)-250(check)-250(for)-250(operation)-250(completion.)]TJ -13.828 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.956 Td [(If)]TJ/F59 9.9626 Tf 8.943 0 Td [(mode)]TJ/F54 9.9626 Tf 23.19 0 Td [(does)-228(not)-227(specify)-228(synchr)18(onous)-228(completion,)-232(then)-227(this)-228(variable)-228(must)]TJ -32.133 -11.955 Td [(be)-250(pr)18(esent.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F67 9.9626 Tf 0 -18.964 Td [(call)-525(prec%descr\050info\051)]TJ 0 -11.955 Td [(call)-525(prec%descr\050info,iout,)-525(root\051)]TJ 0 g 0 G - [-500(This)-345(r)18(eduction)-345(is)-346(appr)18(opriate)-345(to)-345(compute)-345(the)-345(r)18(esults)-346(of)-345(multiple)-345(\050local\051)]TJ 12.453 -11.955 Td [(NRM2)-250(operations)-250(at)-250(the)-250(same)-250(time.)]TJ -0 g 0 G - -12.453 -19.925 Td [(2.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G - [-500(Denoting)-249(by)]TJ/F52 9.9626 Tf 69.789 0 Td [(d)-40(a)-25(t)]TJ/F52 7.5716 Tf 13.536 -1.96 Td [(i)]TJ/F54 9.9626 Tf 5.23 1.96 Td [(the)-249(value)-249(of)-248(the)-249(variable)]TJ/F52 9.9626 Tf 108.808 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 15.973 0 Td [(on)-249(pr)18(ocess)]TJ/F52 9.9626 Tf 49.078 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(,)-249(the)-249(output)]TJ/F52 9.9626 Tf 53.71 0 Td [(r)-17(e)-25(s)]TJ/F54 9.9626 Tf -306.635 -11.955 Td [(is)-250(equivalent)-250(to)-250(the)-250(computation)-250(of)]TJ/F52 9.9626 Tf 124.796 -25.468 Td [(r)-17(e)-25(s)]TJ/F85 10.3811 Tf 15.061 0 Td [(=)]TJ/F1 9.9626 Tf 11.086 10.922 Td [(r)]TJ -ET -q -1 0 0 1 285.832 490.532 cm -[]0 d 0 J 0.389 w 0 0 m 30.512 0 l S -Q -BT -/F96 13.9477 Tf 285.957 477.344 Td [(\345)]TJ/F52 7.5716 Tf 4.245 -8.764 Td [(i)]TJ/F52 9.9626 Tf 8.364 10.836 Td [(d)-40(a)-25(t)]TJ/F54 7.5716 Tf 13.495 3.473 Td [(2)]TJ/F52 7.5716 Tf 0.041 -7.027 Td [(i)]TJ/F54 9.9626 Tf 4.243 3.554 Td [(,)]TJ -191.543 -30.806 Td [(with)-250(car)18(e)-250(taken)-250(to)-250(avoid)-250(unnecessary)-250(over\003ow)92(.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G - -12.453 -19.926 Td [(3.)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 31.023 0 Td [(dat)]TJ/F54 9.9626 Tf 17.584 0 Td [(ar)18(gument)-190(is)-190(both)-190(input)-190(and)-190(output,)-202(and)-190(its)-190(value)-190(may)-190(be)-190(changed)]TJ -36.154 -11.955 Td [(even)-250(on)-250(pr)18(ocesses)-250(dif)18(fer)18(ent)-250(fr)18(om)-250(the)-250(\002nal)-250(r)18(esult)-250(destination.)]TJ 0 g 0 G - -12.453 -19.925 Td [(4.)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G - [-500(The)]TJ/F59 9.9626 Tf 32.225 0 Td [(mode)]TJ/F54 9.9626 Tf 24.015 0 Td [(ar)18(gument)-311(can)-310(be)-311(built)-310(with)-311(the)-310(bitwise)]TJ/F59 9.9626 Tf 176.537 0 Td [(IOR\050\051)]TJ/F54 9.9626 Tf 29.246 0 Td [(operator;)-341(in)-310(the)]TJ -249.57 -11.955 Td [(following)-203(example,)-213(the)-204(ar)18(gument)-203(is)-204(for)18(cing)-203(immediate)-203(completion,)-213(hence)]TJ 0 -11.955 Td [(the)]TJ/F59 9.9626 Tf 16.309 0 Td [(request)]TJ/F54 9.9626 Tf 39.103 0 Td [(ar)18(gument)-250(needs)-250(not)-250(be)-250(speci\002ed:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F59 9.9626 Tf 117.836 0 Td [(local)]TJ/F62 9.9626 Tf -117.278 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ ET q -1 0 0 1 124.802 333.043 cm -0 0 318.804 27.895 re f +1 0 0 1 337.631 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 137.205 350.279 Td [(call)]TJ -0 g 0 G - [-525(psb_nrm2\050ctxt,dat,&)]TJ 23.537 -10.959 Td [(&)-525(mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [(ior)]TJ -0 g 0 G - [(\050psb_collective_start_,psb_collective_end_\051\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0 g 0 G -/F54 9.9626 Tf -48.393 -32.18 Td [(5.)]TJ -0 g 0 G - [-500(When)-295(splitting)-294(the)-295(operation)-295(in)-295(two)-294(calls,)-306(the)]TJ/F59 9.9626 Tf 216.877 0 Td [(dat)]TJ/F54 9.9626 Tf 18.628 0 Td [(ar)18(gument)]TJ/F52 9.9626 Tf 45.835 0 Td [(must)-295(not)]TJ/F54 9.9626 Tf 39.636 0 Td [(be)]TJ -308.523 -11.955 Td [(accessed)-250(between)-250(calls:)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F67 9.9626 Tf 340.77 577.576 Td [(prec)]TJ ET q -1 0 0 1 124.802 222.458 cm -0 0 318.804 60.772 re f +1 0 0 1 362.319 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 127.791 272.57 Td [(call)]TJ +/F67 9.9626 Tf 365.457 577.576 Td [(type)]TJ 0 g 0 G - [-525(psb_nrm2\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G - [(psb_collective_start_,&)]TJ 23.536 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(iout)]TJ 0 g 0 G - [(nrm2_request\051)]TJ -14.122 -10.959 Td [(.......)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 8.9664 Tf 37.659 0 Td [(!)-525(Do)-525(not)-525(access)-525(dat)]TJ +/F62 9.9626 Tf 23.243 0 Td [(output)-250(unit.)-310(Scope:)]TJ/F59 9.9626 Tf 87.391 0 Td [(local)]TJ/F62 9.9626 Tf -85.727 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(number)74(.)-310(Default:)-310(default)-250(output)-250(unit.)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F94 8.9664 Tf -37.659 -10.959 Td [(call)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(root)]TJ 0 g 0 G - [-525(psb_nrm2\050ctxt,dat,mode)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F62 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(fr)18(om)-250(which)-250(to)-250(print)-250(Scope:)]TJ/F59 9.9626 Tf 155.834 0 Td [(local)]TJ/F62 9.9626 Tf -154.18 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-387(as:)-585(an)-387(integer)-387(number)-388(between)-387(0)-387(and)]TJ/F60 9.9626 Tf 220.442 0 Td [(n)-80(p)]TJ/F91 10.3811 Tf 14.01 0 Td [(\000)]TJ/F62 9.9626 Tf 10.638 0 Td [(1,)-422(in)-387(which)-387(case)]TJ -245.09 -11.955 Td [(the)-314(speci\002e)1(d)-314(pr)18(ocess)-314(will)-313(print)-314(the)-313(description,)-330(or)]TJ/F91 10.3811 Tf 225.38 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1,)-329(in)-314(which)-314(case)-313(all)]TJ -233.574 -11.955 Td [(pr)18(ocesses)-250(will)-250(print.)-310(Default:)-310(0.)]TJ 0 g 0 G - [(psb_collective_end_,&)]TJ 23.537 -10.959 Td [(&)-525(request)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ +/F59 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ 0 g 0 G - [(nrm2_request\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G + 0 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ 0 g 0 G -/F54 9.9626 Tf 103.537 -138.296 Td [(133)]TJ + 139.477 -263.975 Td [(155)]TJ 0 g 0 G ET endstream -endobj -1757 0 obj -<< -/Length 5352 ->> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(7.16)-1000(psb)]TJ -ET -q -1 0 0 1 204.216 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 207.803 706.129 Td [(snd)-250(\227)-250(Send)-250(data)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_snd\050ctxt,)-525(dat,)-525(dst,)-525(m\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(sends)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ +endobj +1994 0 obj +<< +/Length 973 +>> +stream 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ +BT +/F59 11.9552 Tf 150.705 706.129 Td [(10.5)-1000(clone)-250(\227)-250(clone)-250(current)-250(preconditioner)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G +/F67 9.9626 Tf 0 -18.964 Td [(call)-1050(prec%clone\050precout,info\051)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(dat)]TJ +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G -/F54 9.9626 Tf 19.367 0 Td [(The)-250(data)-250(to)-250(be)-250(sent.)]TJ 5.54 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.01 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-269(may)-270(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.956 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F52 9.9626 Tf 197.687 0 Td [(m)]TJ/F54 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(dst)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G -/F54 9.9626 Tf 18.809 0 Td [(Destination)-250(pr)18(ocess.)]TJ 6.098 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(d)-25(s)-25(t)]TJ/F61 10.3811 Tf 15.689 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1.)]TJ +/F62 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -241.89 -31.88 Td [(m)]TJ +/F59 9.9626 Tf -77.917 -33.873 Td [(On)-250(Return)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.213 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(m)]TJ/F61 10.3811 Tf 10.767 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.955 Td [(When)]TJ/F52 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F52 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ + 0 -19.926 Td [(precout)]TJ 0 g 0 G -/F51 11.9552 Tf 0 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 39.292 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.453 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -39.292 -19.925 Td [(info)]TJ 0 g 0 G - [-500(This)-292(subr)18(outine)-292(impl)1(ies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.454 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(destination)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 158.309 0 Td [(d)-25(s)-25(t)]TJ/F54 9.9626 Tf 12.797 0 Td [(.)]TJ +/F62 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ 0 g 0 G - -31.629 -104.573 Td [(134)]TJ + 140.583 -449.28 Td [(156)]TJ 0 g 0 G ET endstream endobj -1762 0 obj +2000 0 obj << -/Length 5356 +/Length 2703 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(7.17)-1000(psb)]TJ -ET -q -1 0 0 1 153.407 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 156.993 706.129 Td [(rcv)-250(\227)-250(Receive)-250(data)]TJ -0 g 0 G +/F59 11.9552 Tf 99.895 706.129 Td [(10.6)-1000(free)-250(\227)-250(Free)-250(a)-250(preconditioner)]TJ 0 g 0 G -/F59 9.9626 Tf -57.098 -18.964 Td [(call)-525(psb_rcv\050ctxt,)-525(dat,)-525(src,)-525(m\051)]TJ/F54 9.9626 Tf 14.944 -21.918 Td [(This)-250(subr)18(outine)-250(r)18(eceives)-250(a)-250(packet)-250(of)-250(data)-250(to)-250(a)-250(destination.)]TJ 0 g 0 G -/F51 9.9626 Tf -14.944 -19.925 Td [(T)90(ype:)]TJ +/F67 9.9626 Tf 0 -18.964 Td [(call)-525(prec%free\050info\051)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous:)-310(see)-250(usage)-250(notes.)]TJ +/F59 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G +/F59 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G - 0 -19.926 Td [(ctxt)]TJ -0 g 0 G -/F54 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.926 Td [(src)]TJ + 0 -19.925 Td [(prec)]TJ 0 g 0 G -/F54 9.9626 Tf 17.704 0 Td [(Sour)18(ce)-250(pr)18(ocess.)]TJ 7.203 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(s)-15(r)-17(c)]TJ/F61 10.3811 Tf 15.141 0 Td [(<)]TJ/F85 10.3811 Tf 8.318 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 13.504 0 Td [(\000)]TJ/F54 9.9626 Tf 10.131 0 Td [(1.)]TJ +/F62 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ +ET +q +1 0 0 1 337.631 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 340.77 577.576 Td [(prec)]TJ +ET +q +1 0 0 1 362.319 577.775 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 365.457 577.576 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -241.342 -31.881 Td [(m)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 13.838 0 Td [(Number)-250(of)-250(r)18(ows.)]TJ 11.069 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf 40.946 0 Td [(.)]TJ -68.034 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value)-250(0)]TJ/F61 10.3811 Tf 138.85 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(m)]TJ/F61 10.3811 Tf 10.767 0 Td [(<)]TJ/F85 10.3811 Tf 8.319 0 Td [(=)]TJ/F52 9.9626 Tf 11.086 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(.)]TJ -235.542 -11.956 Td [(When)]TJ/F52 9.9626 Tf 29.859 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 16.898 0 Td [(is)-342(a)-341(rank)-342(2)-341(array)111(,)-365(speci\002es)-342(the)-341(number)-342(of)-341(r)18(ows)-342(to)-342(be)-341(sent)-342(in-)]TJ -46.757 -11.955 Td [(dependently)-341(of)-340(the)-341(leading)-341(dimension)]TJ/F52 9.9626 Tf 175.121 0 Td [(s)-18(i)-32(z)-25(e)]TJ/F85 10.3811 Tf 15.94 0 Td [(\050)]TJ/F52 9.9626 Tf 4.274 0 Td [(d)-40(a)-25(t)]TJ/F54 9.9626 Tf 13.494 0 Td [(,)-167(1)]TJ/F85 10.3811 Tf 9.257 0 Td [(\051)]TJ/F54 9.9626 Tf 4.15 0 Td [(;)-386(must)-341(have)-340(the)-341(same)]TJ -222.236 -11.955 Td [(value)-250(on)-250(sending)-250(and)-250(r)18(eceiving)-250(pr)18(ocesses.)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(On)-250(Exit)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -21.918 Td [(On)-250(Return)]TJ 0 g 0 G + 0 -19.926 Td [(prec)]TJ 0 g 0 G - 0 -19.925 Td [(dat)]TJ +/F62 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -30.874 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 196.511 0 Td [(psb)]TJ +ET +q +1 0 0 1 337.631 502.059 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 340.77 501.86 Td [(prec)]TJ +ET +q +1 0 0 1 362.319 502.059 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 365.457 501.86 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 19.368 0 Td [(The)-250(data)-250(to)-250(be)-250(r)18(eceived.)]TJ 5.539 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-269(as:)-349(an)-269(integer)74(,)-274(r)18(eal)-269(or)-269(complex)-269(variable,)-274(which)-270(may)-269(be)-269(a)-269(scalar)74(,)]TJ 0 -11.955 Td [(or)-220(a)-220(rank)-219(1)-220(or)-220(2)-220(array)111(,)-226(or)-220(a)-219(character)-220(or)-220(logical)-220(scalar)74(.)-520(T)90(ype,)-225(kind)-220(and)-220(rank)]TJ 0 -11.955 Td [(must)-215(agr)18(ee)-216(on)-215(sender)-215(and)-216(r)18(eceiver)-215(pr)18(ocess;)-227(if)]TJ/F52 9.9626 Tf 197.687 0 Td [(m)]TJ/F54 9.9626 Tf 10.021 0 Td [(is)-215(not)-216(speci\002ed,)-222(size)-215(must)]TJ -207.708 -11.955 Td [(agr)18(ee)-250(as)-250(well.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 12.454 -19.925 Td [(1.)]TJ +/F59 9.9626 Tf -286.483 -19.925 Td [(info)]TJ 0 g 0 G - [-500(This)-292(subr)18(outine)-291(implies)-292(a)-292(synchr)18(onization,)-302(but)-292(only)-292(between)-291(the)-292(calling)]TJ 12.453 -11.955 Td [(pr)18(ocess)-250(and)-250(the)-250(sour)18(ce)-250(pr)18(ocess)]TJ/F52 9.9626 Tf 137.538 0 Td [(s)-15(r)-17(c)]TJ/F54 9.9626 Tf 12.249 0 Td [(.)]TJ +/F62 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F59 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ/F62 9.9626 Tf 34.363 0 Td [(Releases)-250(all)-250(internal)-250(storage.)]TJ 0 g 0 G - -10.31 -104.573 Td [(135)]TJ + 130.021 -333.713 Td [(157)]TJ 0 g 0 G ET endstream endobj -1769 0 obj +2005 0 obj << -/Length 6241 +/Length 548 >> stream 0 g 0 G 0 g 0 G BT -/F51 14.3462 Tf 150.705 705.784 Td [(8)-1000(Error)-250(handling)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-382(PSBLAS)-382(library)-381(err)18(or)-382(handling)-382(policy)-382(has)-382(been)-382(comple)1(tely)-382(r)18(ewritten)-382(in)]TJ 0 -11.955 Td [(version)-359(2.0.)-638(The)-359(idea)-359(behind)-360(the)-359(design)-359(of)-359(this)-359(new)-360(err)18(or)-359(handling)-359(strategy)]TJ 0 -11.955 Td [(is)-303(to)-303(keep)-303(err)18(or)-303(messages)-303(on)-303(a)-303(stack)-303(allowing)-303(the)-303(user)-303(to)-303(trace)-303(back)-303(up)-303(to)-303(the)]TJ 0 -11.955 Td [(point)-317(wher)18(e)-318(the)-317(\002rst)-317(err)18(or)-318(message)-317(has)-318(been)-317(generated.)-512(Every)-317(r)18(outine)-318(in)-317(the)]TJ 0 -11.956 Td [(PSBLAS-2.0)-336(library)-336(has,)-358(as)-336(last)-337(non-optional)-336(ar)18(gument,)-358(an)-336(integer)]TJ/F59 9.9626 Tf 298.678 0 Td [(info)]TJ/F54 9.9626 Tf 24.271 0 Td [(vari-)]TJ -322.949 -11.955 Td [(able;)-364(whenever)74(,)-344(inside)-326(the)-326(r)18(outine,)-345(an)-326(err)18(or)-325(is)-326(detected,)-345(this)-326(variable)-326(is)-325(set)-326(to)]TJ 0 -11.955 Td [(a)-384(value)-384(corr)18(esponding)-384(to)-384(a)-384(speci\002c)-384(err)18(or)-384(code.)-711(Then)-384(this)-384(err)18(or)-384(code)-384(is)-384(also)]TJ 0 -11.955 Td [(pushed)-274(on)-273(the)-274(err)18(or)-274(stack)-274(and)-273(then)-274(either)-274(contr)18(ol)-274(is)-273(r)18(eturned)-274(to)-274(the)-273(caller)-274(r)18(ou-)]TJ 0 -11.955 Td [(tine)-342(or)-342(the)-342(execution)-343(is)-342(aborted,)-365(depending)-342(on)-342(the)-342(users)-342(choice.)-587(At)-342(the)-342(time)]TJ 0 -11.955 Td [(when)-243(the)-242(execution)-243(is)-242(aborted,)-244(an)-243(err)18(or)-242(message)-243(is)-243(p)1(rinted)-243(on)-243(standar)18(d)-242(output)]TJ 0 -11.956 Td [(with)-257(a)-256(level)-257(of)-256(verbosity)-257(than)-256(can)-257(be)-256(chosen)-257(by)-257(the)-256(user)74(.)-330(If)-256(the)-257(execution)-257(is)-256(not)]TJ 0 -11.955 Td [(aborted,)-259(then,)-259(the)-257(caller)-257(r)18(outine)-258(checks)-257(the)-257(value)-257(r)18(eturned)-257(in)-258(the)]TJ/F59 9.9626 Tf 284.621 0 Td [(info)]TJ/F54 9.9626 Tf 23.484 0 Td [(variable)]TJ -308.105 -11.955 Td [(and,)-290(if)-282(not)-282(zer)18(o,)-291(an)-282(err)18(or)-282(condition)-282(is)-282(raised.)-407(This)-282(pr)18(ocess)-282(continues)-282(on)-282(all)-282(the)]TJ 0 -11.955 Td [(levels)-203(of)-203(nested)-203(calls)-203(until)-203(the)-203(level)-203(wher)18(e)-202(the)-203(user)-203(decides)-203(to)-203(abort)-203(the)-203(pr)18(ogram)]TJ 0 -11.955 Td [(execution.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-286(5)]TJ -0 g 0 G - [-285(shows)-286(the)-286(layou)1(t)-286(of)-286(a)-285(generic)]TJ/F59 9.9626 Tf 172.064 0 Td [(psb_foo)]TJ/F54 9.9626 Tf 39.458 0 Td [(r)18(outine)-286(with)-285(r)18(espect)-286(to)-286(the)]TJ -226.466 -11.956 Td [(PSBLAS-2.0)-258(err)18(or)-259(handling)-258(policy)111(.)-335(It)-258(is)-258(possible)-259(to)-258(see)-258(how)92(,)-261(whenever)-258(an)-258(err)18(or)]TJ 0 -11.955 Td [(condition)-298(is)-298(detected,)-311(the)]TJ/F59 9.9626 Tf 114.879 0 Td [(info)]TJ/F54 9.9626 Tf 23.893 0 Td [(variable)-298(is)-299(set)-298(to)-298(the)-298(corr)18(esponding)-299(err)18(or)-298(code)]TJ -138.772 -11.955 Td [(which)-309(is,)-324(then,)-324(pushed)-310(on)-309(top)-309(of)-310(the)-309(stack)-309(by)-309(means)-310(of)-309(the)]TJ/F59 9.9626 Tf 265.277 0 Td [(psb_errpush)]TJ/F54 9.9626 Tf 57.534 0 Td [(.)-488(An)]TJ -322.811 -11.955 Td [(err)18(or)-325(condition)-326(may)-325(be)-326(dir)18(ectl)1(y)-326(detected)-325(inside)-326(a)-325(r)18(outine)-325(or)-326(indir)18(ectly)-325(check-)]TJ 0 -11.955 Td [(ing)-331(the)-331(err)18(or)-331(code)-331(r)18(eturned)-331(r)18(eturned)-331(by)-331(a)-331(called)-331(r)18(outine.)-553(Whenever)-331(an)-331(err)18(or)]TJ 0 -11.956 Td [(is)-253(encounter)18(ed,)-255(after)-253(it)-254(has)-253(been)-254(pushed)-253(on)-254(st)1(ack,)-255(the)-253(pr)18(ogram)-254(execution)-253(skips)]TJ 0 -11.955 Td [(to)-264(a)-265(point)-264(wher)18(e)-264(the)-265(err)18(or)-264(condition)-264(is)-264(handled;)-272(the)-264(err)18(or)-265(condition)-264(is)-264(handled)]TJ 0 -11.955 Td [(either)-336(by)-336(r)18(eturning)-336(contr)18(ol)-336(to)-336(the)-336(caller)-335(r)17(o)1(utine)-336(or)-336(by)-336(calling)-336(the)]TJ/F59 9.9626 Tf 291.408 0 Td [(psb\134_error)]TJ/F54 9.9626 Tf -291.408 -11.955 Td [(r)18(outine)-273(which)-274(prints)-273(the)-274(content)-273(of)-273(the)-274(err)18(or)-273(stack)-274(and)-273(aborts)-273(the)-274(pr)18(ogram)-273(ex-)]TJ 0 -11.955 Td [(ecution,)-373(accor)18(ding)-348(to)-348(the)-348(choice)-348(made)-348(by)-348(the)-348(user)-348(with)]TJ/F59 9.9626 Tf 252.305 0 Td [(psb_set_erraction)]TJ/F54 9.9626 Tf 88.915 0 Td [(.)]TJ -341.22 -11.955 Td [(The)-297(default)-296(is)-297(to)-296(print)-297(the)-297(err)18(or)-296(and)-297(terminate)-296(the)-297(pr)18(ogram,)-308(but)-297(the)-297(user)-296(may)]TJ 0 -11.956 Td [(choose)-250(to)-250(handle)-250(the)-250(err)18(or)-250(explicitly)111(.)]TJ 14.944 -11.955 Td [(Figur)18(e)]TJ -0 0 1 rg 0 0 1 RG - [-347(6)]TJ -0 g 0 G - [-348(r)18(eports)-347(a)-347(sample)-347(err)18(or)-348(message)-347(generated)-347(by)-348(the)-347(PSBLAS-2.0)-347(li-)]TJ -14.944 -11.955 Td [(brary)111(.)-539(This)-327(err)18(or)-326(has)-327(been)-326(generated)-327(by)-326(the)-326(fact)-327(that)-326(the)-327(user)-326(has)-327(chosen)-326(the)]TJ 0 -11.955 Td [(invalid)-379(\223FOO\224)-380(stor)1(a)-1(g)1(e)-380(format)-379(to)-379(r)18(epr)18(esent)-380(the)-379(sparse)-379(matrix.)-698(Fr)18(om)-380(this)-379(er)18(-)]TJ 0 -11.955 Td [(r)18(or)-394(message)-393(it)-394(is)-393(possible)-394(to)-394(se)1(e)-394(that)-394(the)-393(err)18(or)-394(has)-393(been)-394(detected)-394(inside)-393(the)]TJ/F59 9.9626 Tf 0 -11.955 Td [(psb_cest)]TJ/F54 9.9626 Tf 45.361 0 Td [(subr)18(outine)-353(called)-353(by)]TJ/F59 9.9626 Tf 95.326 0 Td [(psb_spasb)]TJ/F54 9.9626 Tf 50.591 0 Td [(...)-619(by)-354(pr)18(ocess)-353(0)-353(\050i.e.)-619(the)-353(r)18(oot)-354(pr)18(o-)]TJ -191.278 -11.956 Td [(cess\051.)]TJ +/F59 14.3462 Tf 150.705 705.784 Td [(11)-1000(Iterative)-250(Methods)]TJ/F62 9.9626 Tf 0 -22.702 Td [(In)-402(this)-403(chapter)-402(we)-403(pr)18(ovide)-402(r)18(outines)-403(for)-402(pr)18(econditioners)-402(and)-403(iterative)-402(meth-)]TJ 0 -11.955 Td [(ods.)-472(The)-304(interfaces)-304(for)-304(Krylov)-304(subspace)-303(methods)-304(ar)18(e)-304(available)-304(in)-304(the)-304(module)]TJ/F67 9.9626 Tf 0 -11.955 Td [(psb_krylov_mod)]TJ/F62 9.9626 Tf 73.225 0 Td [(.)]TJ 0 g 0 G - 164.384 -198.123 Td [(136)]TJ + 91.158 -568.734 Td [(158)]TJ 0 g 0 G ET endstream endobj -1775 0 obj +2012 0 obj << -/Length 6642 +/Length 8246 >> stream 0 g 0 G 0 g 0 G -0 g 0 G -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +BT +/F59 11.9552 Tf 99.895 706.129 Td [(11.1)-1000(psb)]TJ +ET q -1 0 0 1 99.895 417.212 cm -0 0 343.711 292.902 re f +1 0 0 1 153.407 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 102.884 698.757 Td [(subroutine)]TJ -0 g 0 G - [-525(psb_foo\050some)-525(args,)-525(info\051)]TJ -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 9.9626 Tf 15.691 -11.956 Td [(!...)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -11.955 Td [(if)]TJ -0 g 0 G - [(\050error)-525(detected\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -0 g 0 G - 15.691 -11.955 Td [(info)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(errcode1)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_errpush\050)]TJ -0.25 0.44 0.63 rg 0.25 0.44 0.63 RG - [(\015psb_foo\015)]TJ -0 g 0 G - [(,)-525(errcode1\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(goto)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(9999)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(end)-525(if)]TJ -0 g 0 G -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 9.9626 Tf 0 -11.956 Td [(!...)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -/F59 9.9626 Tf 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_bar\050some)-525(args,)-525(info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(if)]TJ -0 g 0 G - [(\050info)-525(.ne.)-525(zero\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -0 g 0 G - 15.691 -11.955 Td [(info)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ -0 g 0 G - [(errcode2)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_errpush\050)]TJ -0.25 0.44 0.63 rg 0.25 0.44 0.63 RG - [(\015psb_foo\015)]TJ -0 g 0 G - [(,)-525(errcode2\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.956 Td [(goto)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(9999)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(end)-525(if)]TJ -0 g 0 G -0.38 0.63 0.69 rg 0.38 0.63 0.69 RG -/F112 9.9626 Tf 0 -11.955 Td [(!...)]TJ -0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG -/F59 9.9626 Tf -15.691 -11.955 Td [(9999)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(continue)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 15.691 -11.955 Td [(if)]TJ -0 g 0 G - [-525(\050err_act)-525(.eq.)-525(act_abort\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 15.691 -11.955 Td [(call)]TJ -0 g 0 G - [-525(psb_error\050icontxt\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.956 Td [(return)]TJ +/F59 11.9552 Tf 156.993 706.129 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ/F62 9.9626 Tf -57.098 -18.964 Td [(This)-266(subr)18(outine)-266(is)-267(a)-266(driver)-266(that)-267(p)1(r)18(ovides)-267(a)-266(general)-266(interface)-266(for)-267(all)-266(the)-266(Krylov-)]TJ 0 -11.955 Td [(Subspace)-250(family)-250(methods)-250(implemented)-250(in)-250(PSBLAS)-250(version)-250(2.)]TJ 14.944 -11.955 Td [(The)-250(stopping)-250(criterion)-250(can)-250(take)-250(the)-250(following)-250(values:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(else)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -11.955 Td [(return)]TJ -0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -15.691 -11.955 Td [(end)-525(if)]TJ +/F59 9.9626 Tf -14.944 -18.774 Td [(1)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -23.91 Td [(end)-525(subroutine)]TJ -0 g 0 G - [-525(psb_foo)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +/F62 9.9626 Tf 9.963 0 Td [(normwise)-222(backwar)18(d)-221(err)18(or)-222(in)-221(the)-222(in\002nity)-222(norm;)-231(the)-221(iteration)-222(is)-222(stopped)-221(when)]TJ/F60 9.9626 Tf 109.036 -26.864 Td [(e)-15(r)-25(r)]TJ/F93 10.3811 Tf 15.14 0 Td [(=)]TJ/F91 10.3811 Tf 40.62 6.745 Td [(k)]TJ/F60 9.9626 Tf 5.34 0 Td [(r)]TJ/F60 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ +ET +q +1 0 0 1 246.191 620.108 cm +[]0 d 0 J 0.398 w 0 0 m 74.372 0 l S +Q +BT +/F93 10.3811 Tf 246.316 610.783 Td [(\050)]TJ/F91 10.3811 Tf 4.274 0 Td [(k)]TJ/F60 9.9626 Tf 5.938 0 Td [(A)]TJ/F91 10.3811 Tf 7.442 0 Td [(k)-24(k)]TJ/F60 9.9626 Tf 11.048 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.876 1.96 Td [(k)]TJ/F93 10.3811 Tf 7.376 0 Td [(+)]TJ/F91 10.3811 Tf 10.256 0 Td [(k)]TJ/F60 9.9626 Tf 5.44 0 Td [(b)]TJ/F91 10.3811 Tf 4.861 0 Td [(k)]TJ/F93 10.3811 Tf 5.44 0 Td [(\051)]TJ/F69 10.3811 Tf 8.236 6.834 Td [(<)]TJ/F60 9.9626 Tf 11.087 0 Td [(e)-80(p)-25(s)]TJ 0 g 0 G +/F59 9.9626 Tf -235.842 -29.908 Td [(2)]TJ 0 g 0 G -/F54 9.9626 Tf -2.989 -41.729 Td [(Listing)-289(5:)-387(The)-289(layout)-289(of)-289(a)-289(generic)]TJ/F59 9.9626 Tf 149.96 0 Td [(psb)]TJ +/F62 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F60 9.9626 Tf 136.209 -26.865 Td [(e)-15(r)-25(r)]TJ/F93 10.3811 Tf 15.141 0 Td [(=)]TJ/F91 10.3811 Tf 13.446 6.745 Td [(k)]TJ/F60 9.9626 Tf 5.34 0 Td [(r)]TJ/F60 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ ET q -1 0 0 1 266.174 382.258 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 273.365 563.335 cm +[]0 d 0 J 0.398 w 0 0 m 20.025 0 l S Q BT -/F59 9.9626 Tf 269.312 382.059 Td [(foo)]TJ/F54 9.9626 Tf 18.568 0 Td [(r)18(outine)-289(with)-289(r)18(espect)-288(to)-289(PSBLAS-2.0)]TJ -187.985 -11.955 Td [(err)18(or)-250(handling)-250(policy)111(.)]TJ +/F91 10.3811 Tf 273.49 554.01 Td [(k)]TJ/F60 9.9626 Tf 5.439 0 Td [(b)]TJ/F91 10.3811 Tf 4.862 0 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F69 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F60 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG -/F59 9.9626 Tf 0 -19.609 Td [(==========================================================)]TJ +/F59 9.9626 Tf -208.668 -29.848 Td [(3)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ +/F62 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F60 9.9626 Tf 134.486 -26.865 Td [(e)-15(r)-25(r)]TJ/F93 10.3811 Tf 15.14 0 Td [(=)]TJ/F91 10.3811 Tf 15.17 6.745 Td [(k)]TJ/F60 9.9626 Tf 5.34 0 Td [(r)]TJ/F60 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F91 10.3811 Tf 2.875 1.96 Td [(k)]TJ +ET +q +1 0 0 1 271.641 506.622 cm +[]0 d 0 J 0.398 w 0 0 m 23.472 0 l S +Q +BT +/F91 10.3811 Tf 271.766 497.297 Td [(k)]TJ/F60 9.9626 Tf 5.34 0 Td [(r)]TJ/F62 7.5716 Tf 4 -1.744 Td [(0)]TJ/F91 10.3811 Tf 4.408 1.744 Td [(k)]TJ/F62 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F69 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F60 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ/F62 9.9626 Tf -210.391 -29.848 Td [(The)-248(behaviour)-248(is)-248(contr)18(olled)-249(by)-248(the)-248(istop)-248(ar)18(gument)-248(\050see)-248(later\051.)-310(In)-248(the)-248(above)-248(for)18(-)]TJ 0 -11.956 Td [(mulae,)]TJ/F60 9.9626 Tf 32.81 0 Td [(x)]TJ/F60 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F62 9.9626 Tf 5.303 1.96 Td [(is)-256(the)-256(tentative)-257(soluti)1(on)-257(and)]TJ/F60 9.9626 Tf 125.144 0 Td [(r)]TJ/F60 7.5716 Tf 4.042 -1.96 Td [(i)]TJ/F93 10.3811 Tf 5.757 1.96 Td [(=)]TJ/F60 9.9626 Tf 11.2 0 Td [(b)]TJ/F91 10.3811 Tf 6.822 0 Td [(\000)]TJ/F60 9.9626 Tf 10.777 0 Td [(A)-42(x)]TJ/F60 7.5716 Tf 12.759 -1.96 Td [(i)]TJ/F62 9.9626 Tf 5.303 1.96 Td [(the)-256(corr)18(esponding)-256(r)18(esidual)]TJ -225.064 -11.955 Td [(at)-250(the)]TJ/F60 9.9626 Tf 27.083 0 Td [(i)]TJ/F62 9.9626 Tf 2.964 0 Td [(-th)-250(iteration.)]TJ -28.305 -17.357 Td [(c)-175(a)-175(l)-174(l)-880(p)-105(s)-105(b)]TJ +ET +q +1 0 0 1 150.28 433.215 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 154.313 433.015 Td [(k)-105(r)-105(y)-104(l)-105(o)-105(v)-238(\050)-156(m)-21(e)-22(t)-21(h)-22(o)-22(d)-218(,)-208(a)-242(,)-255(p)-80(r)-81(e)-80(c)-335(,)-191(b)-206(,)-203(x)-231(,)-234(e)-60(p)-59(s)-293(,)-273(d)-98(e)-97(s)-98(c)]TJ +ET +q +1 0 0 1 352.02 433.215 cm +[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +Q +BT +/F62 9.9626 Tf 355.983 433.015 Td [(a)-370(,)-283(i)-108(n)-108(f)-108(o)-274(,)-57(&)]TJ -227.086 -11.955 Td [(&)-580(i)-69(t)-69(m)-70(a)-69(x)-313(,)-327(i)-151(t)-152(e)-151(r)-478(,)-281(e)-107(r)-106(r)-387(,)-321(i)-145(t)-146(r)-146(a)-145(c)-146(e)-466(,)-336(i)-161(r)-160(s)-161(t)-496(,)-291(i)-116(s)-116(t)-116(o)-116(p)-407(,)-219(c)-43(o)-43(n)-44(d)-177(\051)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(4010)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(\051)]TJ +/F59 9.9626 Tf -29.002 -25.88 Td [(T)90(ype:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G - [-525(subroutine:)-525(df_sample)]TJ 0 -11.955 Td [(Error)-525(from)-525(call)-525(to)-525(subroutine)-525(mat)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(dist)]TJ +/F59 9.9626 Tf -29.828 -19.349 Td [(On)-250(Entry)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.955 Td [(==========================================================)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ + 0 -19.349 Td [(method)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ +/F62 9.9626 Tf 39.851 0 Td [(a)-193(string)-194(that)-193(de\002nes)-194(the)-194(it)1(erative)-194(method)-193(to)-194(be)-194(use)1(d.)-292(Supported)-193(values)]TJ -14.944 -11.956 Td [(ar)18(e:)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(4010)]TJ +/F59 9.9626 Tf 0 -19.349 Td [(CG:)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(\051)]TJ +/F62 9.9626 Tf 22.964 0 Td [(the)-250(Conjugate)-250(Gradient)-250(method;)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +/F59 9.9626 Tf -22.964 -15.364 Td [(CGS:)]TJ 0 g 0 G - [-525(subroutine:)-525(mat_distv)]TJ 0 -11.955 Td [(Error)-525(from)-525(call)-525(to)-525(subroutine)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(psb_spasb)]TJ +/F62 9.9626 Tf 29.051 0 Td [(the)-250(Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.956 Td [(==========================================================)]TJ +/F59 9.9626 Tf -29.051 -15.365 Td [(GCR:)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ +/F62 9.9626 Tf 30.157 0 Td [(the)-250(Generalized)-250(Conjugate)-250(Residual)-250(method;)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ +/F59 9.9626 Tf -30.157 -15.364 Td [(FCG:)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(4010)]TJ +/F62 9.9626 Tf 28.503 0 Td [(the)-250(Flexible)-250(Conjugate)-250(Gradient)-250(method)]TJ +0 0 1 rg 0 0 1 RG +/F62 7.5716 Tf 176.854 3.616 Td [(5)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(\051)]TJ +/F62 9.9626 Tf 4.284 -3.616 Td [(;)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +/F59 9.9626 Tf -209.641 -15.364 Td [(BICG:)]TJ 0 g 0 G - [-525(subroutine:)-525(psb_spasb)]TJ 0 -11.955 Td [(Error)-525(from)-525(call)-525(to)-525(subroutine)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(psb_cest)]TJ +/F62 9.9626 Tf 33.484 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(method;)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.955 Td [(==========================================================)]TJ +/F59 9.9626 Tf -33.484 -15.365 Td [(BICGST)92(AB:)]TJ 0 g 0 G - 0 -11.955 Td [(Process:)]TJ -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [-525(0)]TJ +/F62 9.9626 Tf 59.696 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ 0 g 0 G - [(.)-1050(PSBLAS)-525(Error)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(\050)]TJ +/F59 9.9626 Tf -59.696 -15.364 Td [(BICGST)92(ABL:)]TJ 0 g 0 G -0.25 0.63 0.44 rg 0.25 0.63 0.44 RG - [(136)]TJ +/F62 9.9626 Tf 65.783 0 Td [(the)-218(Bi-Conjugate)-217(Gradient)-218(Stabilized)-218(method)-217(with)-218(r)18(estart-)]TJ -43.865 -11.955 Td [(ing;)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(\051)]TJ +/F59 9.9626 Tf -21.918 -15.365 Td [(RGMRES:)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(in)]TJ +/F62 9.9626 Tf 52.294 0 Td [(the)-250(Generalized)-250(Minimal)-250(Residual)-250(method)-250(with)-250(r)18(estarting.)]TJ 0 g 0 G - [-525(subroutine:)-525(psb_cest)]TJ 0 -11.955 Td [(Format)-525(FOO)-525(is)]TJ -0.73 0.38 0.84 rg 0.73 0.38 0.84 RG - [-525(unknown)]TJ +/F59 9.9626 Tf -77.201 -19.349 Td [(a)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - 0 -11.956 Td [(==========================================================)]TJ +/F62 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(sparse)-250(matrix)]TJ/F60 9.9626 Tf 178.414 0 Td [(A)]TJ/F62 9.9626 Tf 7.317 0 Td [(.)]TJ -170.787 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ +ET +q +1 0 0 1 309.258 138.701 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 312.397 138.501 Td [(Tspmat)]TJ +ET +q +1 0 0 1 344.406 138.701 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 347.544 138.501 Td [(type)]TJ 0 g 0 G - 0 -11.955 Td [(Aborting...)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 0 -29.397 Td [(Listing)-364(6:)-537(A)-364(sample)-364(PSBLAS-3.0)-363(err)18(or)-364(message.)-651(Pr)18(ocess)-364(0)-364(detected)-363(an)-364(err)18(or)]TJ 0 -11.955 Td [(condition)-250(inside)-250(the)-250(psb)]TJ ET q -1 0 0 1 206.215 153.925 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 99.895 130.091 cm +[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S Q BT -/F54 9.9626 Tf 209.204 153.726 Td [(cest)-250(subr)18(outine)]TJ +/F62 5.9776 Tf 110.755 123.219 Td [(5)]TJ/F62 7.9701 Tf 3.487 -2.893 Td [(Note:)-310(the)-250(implementation)-250(is)-250(for)]TJ/F60 7.9701 Tf 113.297 0 Td [(F)-31(C)-45(G)]TJ/F93 8.3049 Tf 16.387 0 Td [(\050)]TJ/F62 7.9701 Tf 3.319 0 Td [(1)]TJ/F93 8.3049 Tf 4.085 0 Td [(\051)]TJ/F62 7.9701 Tf 3.32 0 Td [(.)]TJ 0 g 0 G - 55.075 -63.288 Td [(137)]TJ +0 g 0 G +/F62 9.9626 Tf 9.629 -29.888 Td [(159)]TJ 0 g 0 G ET endstream endobj -1779 0 obj +2023 0 obj << -/Length 3570 +/Length 7054 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(8.1)-1000(psb)]TJ +/F59 9.9626 Tf 150.705 706.129 Td [(prec)]TJ +0 g 0 G +/F62 9.9626 Tf 24.348 0 Td [(The)-250(data)-250(str)8(uctur)18(e)-250(containing)-250(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 360.068 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 201.825 706.129 Td [(errpush)-250(\227)-250(Pushes)-250(an)-250(error)-250(code)-250(onto)-250(the)-250(error)-250(stack)]TJ/F54 9.9626 Tf -49.379 -24.942 Td [(c)-175(a)-175(l)-174(l)-874(p)-98(s)-99(b)]TJ +/F67 9.9626 Tf 363.206 658.308 Td [(prec)]TJ ET q -1 0 0 1 200.841 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 384.755 658.507 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 204.812 681.187 Td [(e)-99(r)-98(r)-99(p)-98(u)-99(s)-99(h)-232(\050)-266(e)-132(r)-132(r)]TJ +/F67 9.9626 Tf 387.893 658.308 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -258.11 -22.202 Td [(b)]TJ +0 g 0 G +/F62 9.9626 Tf 11.068 0 Td [(The)-250(RHS)-250(vector)74(.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.688 0 Td [(psb)]TJ ET q -1 0 0 1 270.843 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 410.618 588.484 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 275.151 681.187 Td [(c)-440(,)-825(r)]TJ +/F67 9.9626 Tf 413.756 588.285 Td [(T)]TJ ET q -1 0 0 1 299.7 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.614 588.484 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 303.188 681.187 Td [(n)-50(a)-50(m)-50(e)-276(,)-929(i)]TJ +/F67 9.9626 Tf 422.752 588.285 Td [(vect)]TJ ET q -1 0 0 1 348.561 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 444.301 588.484 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 353.087 681.187 Td [(e)-154(r)-155(r)-483(,)-920(a)]TJ +/F67 9.9626 Tf 447.439 588.285 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -317.656 -22.202 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(initial)-250(guess.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(local)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.687 0 Td [(psb)]TJ ET q -1 0 0 1 392.305 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 410.618 518.461 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F54 9.9626 Tf 396.74 681.187 Td [(e)-145(r)-145(r)-279(\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -246.035 -27.895 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(err)]TJ +/F67 9.9626 Tf 413.756 518.262 Td [(T)]TJ ET q -1 0 0 1 164.035 613.641 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 419.614 518.461 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 167.023 613.442 Td [(c)]TJ -0 g 0 G -/F54 9.9626 Tf 9.405 0 Td [(the)-250(err)18(or)-250(code)]TJ -0.817 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(r)]TJ +/F67 9.9626 Tf 422.752 518.262 Td [(vect)]TJ ET q -1 0 0 1 155.178 545.895 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 444.301 518.461 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 158.167 545.696 Td [(name)]TJ +/F67 9.9626 Tf 447.439 518.262 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F54 9.9626 Tf 29.888 0 Td [(the)-250(soutine)-250(wher)18(e)-250(the)-250(err)18(or)-250(has)-250(been)-250(caught.)]TJ -12.444 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ +/F59 9.9626 Tf -317.656 -22.203 Td [(eps)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(i)]TJ +/F62 9.9626 Tf 20.473 0 Td [(The)-250(stopping)-250(tolerance.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -22.203 Td [(desc)]TJ ET q -1 0 0 1 154.62 466.194 cm +1 0 0 1 171.218 426.236 cm []0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q BT -/F51 9.9626 Tf 157.609 465.994 Td [(err)]TJ -0 g 0 G -/F54 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ 0.289 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(array)]TJ +/F59 9.9626 Tf 174.207 426.036 Td [(a)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.881 Td [(a)]TJ +/F62 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.089 0 Td [(required)]TJ/F62 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.801 0 Td [(in)]TJ/F62 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 168.138 0 Td [(psb)]TJ ET q -1 0 0 1 156.284 398.448 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 360.068 378.415 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 363.206 378.216 Td [(desc)]TJ +ET +q +1 0 0 1 384.755 378.415 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 9.9626 Tf 159.273 398.249 Td [(err)]TJ +/F67 9.9626 Tf 387.893 378.216 Td [(type)]TJ 0 g 0 G -/F54 9.9626 Tf 17.713 0 Td [(addional)-250(info)-250(for)-250(err)18(or)-250(code)]TJ -1.375 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(string.)]TJ +/F62 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G - 139.477 -271.945 Td [(138)]TJ +/F59 9.9626 Tf -258.11 -22.203 Td [(itmax)]TJ +0 g 0 G +/F62 9.9626 Tf 30.436 0 Td [(The)-250(maximum)-250(number)-250(of)-250(iterations)-250(to)-250(perform.)]TJ -5.529 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.569 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F93 10.3811 Tf 27.744 0 Td [(=)]TJ/F62 9.9626 Tf 10.961 0 Td [(1000.)]TJ -77.274 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F60 9.9626 Tf 142.349 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F91 10.3811 Tf 27.743 0 Td [(\025)]TJ/F62 9.9626 Tf 10.962 0 Td [(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -205.961 -22.202 Td [(itrace)]TJ +0 g 0 G +/F62 9.9626 Tf 29.878 0 Td [(If)]TJ/F69 10.3811 Tf 11.007 0 Td [(>)]TJ/F62 9.9626 Tf 14.142 0 Td [(0)-422(print)-423(out)-422(an)-422(informational)-423(message)-422(about)-422(conver)18(gence)-423(every)]TJ/F60 9.9626 Tf -30.066 -11.955 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F62 9.9626 Tf 26.396 0 Td [(iterations.)-310(If)]TJ/F93 10.3811 Tf 56.313 0 Td [(=)]TJ/F62 9.9626 Tf 10.961 0 Td [(0)-250(print)-250(a)-250(message)-250(in)-250(case)-250(of)-250(conver)18(gence)-250(failur)18(e.)]TJ -93.724 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F60 9.9626 Tf 38.57 0 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F93 10.3811 Tf 26.796 0 Td [(=)]TJ/F91 10.3811 Tf 11.086 0 Td [(\000)]TJ/F62 9.9626 Tf 8.194 0 Td [(1.)]TJ +0 g 0 G +/F59 9.9626 Tf -109.553 -34.158 Td [(irst)]TJ +0 g 0 G +/F62 9.9626 Tf 19.915 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(r)18(estart)-250(parameter)74(.)]TJ 4.992 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.431 0 Td [(global)]TJ/F62 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)]TJ/F60 9.9626 Tf 34.613 0 Td [(i)-22(r)-35(s)-25(t)]TJ/F69 10.3811 Tf 17.671 0 Td [(>)]TJ/F62 9.9626 Tf 10.961 0 Td [(0.)-298(This)-214(is)-213(employed)-214(for)-213(the)-214(BiCGST)74(ABL)-214(or)-213(RGMRES)-214(meth-)]TJ -63.245 -11.955 Td [(ods,)-250(otherwise)-250(it)-250(is)-250(ignor)18(ed.)]TJ +0 g 0 G + 139.477 -29.888 Td [(160)]TJ 0 g 0 G ET endstream endobj -1785 0 obj +2028 0 obj << -/Length 1332 +/Length 4489 >> stream 0 g 0 G 0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(8.2)-1000(psb)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(istop)]TJ +0 g 0 G +/F62 9.9626 Tf 27.666 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(stopping)-250(criterion.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)-351(1:)-351(use)-271(the)-270(normwise)-271(backwar)18(d)-270(err)18(or)74(,)-276(2:)-351(use)-271(the)-270(scaled)-271(2-norm)-270(of)]TJ 0 -11.955 Td [(the)-250(r)18(esidual,)-250(3:)-310(use)-250(the)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm.)-310(Default:)-310(2.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ +0 g 0 G +0 g 0 G + 0 -19.926 Td [(x)]TJ +0 g 0 G +/F62 9.9626 Tf 9.963 0 Td [(The)-250(computed)-250(solution.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(inout)]TJ/F62 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ +0 0 1 rg 0 0 1 RG +/F67 9.9626 Tf 218.688 0 Td [(psb)]TJ ET q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 359.808 558.881 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(error)-306(\227)-306(Prints)-306(the)-306(error)-307(s)1(tack)-307(content)-306(and)-306(aborts)-306(exe-)]TJ -24.221 -13.948 Td [(cution)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-900(p)-126(s)-125(b)]TJ +/F67 9.9626 Tf 362.947 558.682 Td [(T)]TJ ET q -1 0 0 1 151.092 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 368.804 558.881 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 371.943 558.682 Td [(vect)]TJ +ET +q +1 0 0 1 393.492 558.881 cm +[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +Q +BT +/F67 9.9626 Tf 396.63 558.682 Td [(type)]TJ +0 g 0 G +/F62 9.9626 Tf 20.921 0 Td [(.)]TJ +0 g 0 G +/F59 9.9626 Tf -317.656 -19.925 Td [(iter)]TJ +0 g 0 G +/F62 9.9626 Tf 20.473 0 Td [(The)-250(number)-250(of)-250(iterations)-250(performed.)]TJ 4.434 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(variable.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(err)]TJ +0 g 0 G +/F62 9.9626 Tf 17.714 0 Td [(The)-250(conver)18(gence)-250(estimate)-250(on)-250(exit.)]TJ 7.193 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(cond)]TJ +0 g 0 G +/F62 9.9626 Tf 27.119 0 Td [(An)-210(esti)1(mate)-210(of)-210(the)-209(condition)-210(number)-209(of)-210(matrix)]TJ/F60 9.9626 Tf 204.999 0 Td [(A)]TJ/F62 9.9626 Tf 7.318 0 Td [(;)-223(only)-210(available)-209(with)-210(the)]TJ/F60 9.9626 Tf -214.444 -11.956 Td [(C)-45(G)]TJ/F62 9.9626 Tf 17.001 0 Td [(method)-250(on)-250(r)18(eal)-250(data.)]TJ -17.086 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-249(as:)-310(a)-249(r)18(eal)-249(number)74(.)-310(A)-249(corr)18(ect)-250(r)18(esult)-249(will)-249(be)-249(gr)18(eater)-250(than)-249(or)-249(equal)]TJ 0 -11.955 Td [(to)-403(one;)-480(if)-403(speci\002ed)-403(for)-403(non-r)18(eal)-403(data,)-441(or)-403(an)-403(err)18(or)-403(occurr)18(ed,)-441(zer)18(o)-403(is)-403(r)18(e-)]TJ 0 -11.956 Td [(turned.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0 g 0 G +/F62 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(out)]TJ/F62 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0 g 0 G + 139.477 -161.394 Td [(161)]TJ +0 g 0 G +ET + +endstream +endobj +2038 0 obj +<< +/Length 7598 +>> +stream +0 g 0 G +0 g 0 G +BT +/F59 14.3462 Tf 150.705 706.042 Td [(12)-1000(Extensions)]TJ/F62 9.9626 Tf 0 -22.702 Td [(The)-216(EXT)74(,)-217(CUDA)-216(and)-217(RSB)-216(subdir)18(ectories)-216(contains)-217(a)-216(set)-217(of)-216(extensions)-216(to)-217(the)-216(base)]TJ 0 -11.955 Td [(library)111(.)-678(The)-373(extensions)-373(pr)18(ovide)-372(additional)-373(storage)-373(formats)-373(beyond)-372(the)-373(ones)]TJ 0 -11.955 Td [(alr)18(eady)-250(contained)-250(in)-250(the)-250(base)-250(library)111(,)-250(as)-250(well)-250(as)-250(interfaces)-250(to:)]TJ +0 g 0 G +/F59 9.9626 Tf 0 -19.102 Td [(SPGPU)]TJ +0 g 0 G +/F62 9.9626 Tf 39.292 0 Td [(a)-296(CUDA)-297(library)-296(originally)-296(published)-297(as)]TJ +0 1 0 0 k 0 1 0 0 K +/F67 9.9626 Tf 178.891 0 Td [(https://code.google.com/)]TJ -193.277 -11.955 Td [(p/spgpu/)]TJ +0 g 0 G +/F62 9.9626 Tf 46.128 0 Td [(and)-430(now)-430(included)-430(in)-431(t)1(he)]TJ/F67 9.9626 Tf 119.328 0 Td [(cuda)]TJ/F62 9.9626 Tf 25.207 0 Td [(subdir)74(,)-475(for)-430(computations)-430(on)]TJ -190.663 -11.955 Td [(NVIDIA)-250(GPUs;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.906 -19.514 Td [(LIBRSB)]TJ +0 g 0 G +0 1 0 0 k 0 1 0 0 K +/F67 9.9626 Tf 41.514 0 Td [(http://sourceforge.net/projects/librsb/)]TJ +0 g 0 G +/F62 9.9626 Tf 203.983 0 Td [(,)-398(for)-368(computations)-368(on)]TJ -220.591 -11.955 Td [(multicor)18(e)-250(parallel)-250(machines.)]TJ -24.906 -19.102 Td [(The)-318(infrastr)8(uctur)18(e)-317(laid)-318(out)-317(in)-318(the)-318(base)-317(library)-318(to)-318(allow)-317(for)-318(these)-318(extensions)-317(is)]TJ 0 -11.956 Td [(detailed)-299(in)-299(the)-299(r)18(efer)18(ences)-299([)]TJ +1 0 0 rg 1 0 0 RG + [(20)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-299(21)]TJ +0 g 0 G + [(,)]TJ +1 0 0 rg 1 0 0 RG + [-299(10)]TJ +0 g 0 G + [(];)-324(the)-299(CUDA-speci\002c)-299(data)-299(formats)-299(ar)18(e)-299(de-)]TJ 0 -11.955 Td [(scribed)-250(in)-250([)]TJ +1 0 0 rg 1 0 0 RG + [(22)]TJ +0 g 0 G + [(].)]TJ/F59 11.9552 Tf 0 -28.94 Td [(12.1)-1000(Using)-250(the)-250(extensions)]TJ/F62 9.9626 Tf 0 -18.964 Td [(A)-279(sample)-279(application)-279(using)-279(the)-279(PSBLAS)-279(extensions)-279(will)-279(contain)-279(the)-279(following)]TJ 0 -11.955 Td [(steps:)]TJ +0 g 0 G + 13.888 -19.102 Td [(\225)]TJ +0 g 0 G +/F67 9.9626 Tf 11.018 0 Td [(USE)]TJ/F62 9.9626 Tf 18.182 0 Td [(the)-250(appr)18(opriat)-250(modules)-250(\050)]TJ/F67 9.9626 Tf 110.036 0 Td [(psb_ext_mod)]TJ/F62 9.9626 Tf 57.534 0 Td [(,)]TJ/F67 9.9626 Tf 4.981 0 Td [(psb_cuda_mod)]TJ/F62 9.9626 Tf 62.764 0 Td [(\051;)]TJ +0 g 0 G + -264.515 -19.514 Td [(\225)]TJ +0 g 0 G + [-500(Declar)18(e)-190(a)]TJ/F60 9.9626 Tf 53.1 0 Td [(mold)]TJ/F62 9.9626 Tf 21.818 0 Td [(variable)-190(of)-190(the)-190(necessary)-190(type)-190(\050e.g.)]TJ/F67 9.9626 Tf 151.361 0 Td [(psb_d_ell_sparse_mat)]TJ/F62 9.9626 Tf 104.607 0 Td [(,)]TJ/F67 9.9626 Tf -319.867 -11.955 Td [(psb_d_hlg_sparse_mat)]TJ/F62 9.9626 Tf 104.606 0 Td [(,)]TJ/F67 9.9626 Tf 4.982 0 Td [(psb_d_vect_cuda)]TJ/F62 9.9626 Tf 78.455 0 Td [(\051;)]TJ +0 g 0 G + -199.062 -19.514 Td [(\225)]TJ +0 g 0 G + [-500(Pass)-289(the)-290(mold)-289(variable)-290(to)-289(the)-290(base)-289(library)-289(interface)-290(wher)18(e)-289(needed)-290(to)-289(en-)]TJ 11.019 -11.955 Td [(sur)18(e)-250(the)-250(appr)18(opriate)-250(dynamic)-250(type.)]TJ -24.907 -19.102 Td [(Suppose)-366(you)-367(want)-366(to)-367(use)-367(the)-366(CUDA-enabled)-366(ELLP)92(ACK)-367(data)-366(str)8(uctur)18(e;)-425(you)]TJ 0 -11.955 Td [(would)-371(use)-370(a)-371(piece)-370(of)-371(code)-370(like)-371(this)-370(\050and)-371(don't)-370(for)18(get,)-401(you)-371(need)-370(CUDA-side)]TJ 0 -11.955 Td [(vectors)-250(along)-250(with)-250(the)-250(matrices\051:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET +q +1 0 0 1 150.705 120.326 cm +0 0 343.711 225.156 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F54 9.9626 Tf 155.328 667.24 Td [(e)-125(r)-125(r)-126(o)-125(r)-259(\050)-279(i)-146(c)-146(o)-147(n)-146(t)-146(x)-146(t)-280(\051)]TJ +/F102 8.9664 Tf 153.694 334.821 Td [(program)]TJ +0 g 0 G + [-525(my_cuda_test)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.414 -10.959 Td [(use)]TJ +0 g 0 G + [-525(psb_base_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(use)]TJ +0 g 0 G + [-525(psb_util_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.958 Td [(use)]TJ +0 g 0 G + [-525(psb_ext_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(use)]TJ +0 g 0 G + [-525(psb_cuda_mod)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_dspmat_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(a,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(agpu)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_d_vect_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(xg,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(bg)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -21.918 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(xtmp\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_d_vect_cuda\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-3675(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(vmold)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_d_elg_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(aelg)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ +0 g 0 G + [(\050psb_ctxt_type\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(ctxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -55.433 -27.896 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-6825(::)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(iam,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(icontxt)]TJ + [-525(np)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -32.876 Td [(call)]TJ 0 g 0 G - 139.477 -461.235 Td [(139)]TJ + [-525(psb_init\050ctxt\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_info\050ctxt,iam,np\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(psb_cuda_init\050ctxt,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(iam\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0 g 0 G +/F62 9.9626 Tf 151.98 -58.082 Td [(162)]TJ 0 g 0 G ET endstream endobj -1674 0 obj +1929 0 obj << /Type /ObjStm /N 100 -/First 975 -/Length 9667 ->> -stream -1671 0 1673 118 457 177 1670 235 1676 382 1678 500 1679 558 1680 616 1681 674 1675 731 -1685 866 1687 984 461 1043 1684 1101 1689 1248 1691 1366 1692 1424 1693 1482 1694 1540 1688 1597 -1696 1732 1698 1850 465 1909 1695 1967 1700 2114 1702 2232 1703 2290 1704 2348 1705 2406 1699 2464 -1707 2599 1709 2717 469 2776 1706 2834 1711 2981 1713 3099 1714 3157 1715 3215 1716 3273 1710 3330 -1719 3465 1721 3583 473 3642 1718 3700 1723 3847 1725 3965 1726 4023 1727 4081 1728 4139 1722 4196 -1730 4331 1732 4449 477 4508 1729 4566 1734 4713 1736 4831 1737 4889 1738 4947 1739 5005 1733 5062 -1741 5197 1743 5315 481 5374 1740 5432 1745 5579 1747 5697 1748 5755 1749 5813 1751 5870 1752 5928 -1753 5986 1744 6043 1756 6218 1758 6336 485 6395 1759 6453 1755 6512 1761 6659 1763 6777 489 6835 -1764 6892 1760 6950 1768 7097 1765 7245 1766 7393 1770 7541 493 7600 1767 7658 1774 7752 1776 7870 -1771 7928 1772 7986 1773 8044 1778 8140 1780 8258 497 8317 1781 8375 1782 8434 1777 8493 1784 8574 -% 1671 0 obj -<< -/Type /Page -/Contents 1672 0 R -/Resources 1670 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1653 0 R ->> -% 1673 0 obj -<< -/D [1671 0 R /XYZ 149.705 753.953 null] +/First 976 +/Length 11413 >> -% 457 0 obj +stream +1927 0 1922 58 1932 152 1930 291 1934 434 529 493 1935 551 1936 609 1931 668 1939 762 +1937 901 1941 1046 533 1104 1942 1161 1943 1219 1938 1277 1946 1399 1944 1538 1948 1680 537 1739 +1949 1797 1950 1856 1945 1915 1952 2037 1954 2155 1951 2213 1956 2296 1958 2414 541 2473 1955 2531 +1962 2625 1959 2773 1960 2920 1964 3068 545 3126 1965 3183 1961 3241 1972 3361 1967 3527 1968 3672 +1969 3818 1970 3965 1974 4112 549 4171 1971 4229 1976 4323 1978 4441 1975 4499 1984 4593 1979 4759 +1980 4904 1981 5047 1982 5193 1986 5337 553 5396 1983 5454 1989 5548 1987 5687 1991 5832 557 5890 +1988 5947 1993 6067 1995 6185 561 6244 1992 6302 1999 6396 1996 6544 1997 6689 2001 6836 565 6894 +1998 6951 2004 7045 2006 7163 569 7222 2003 7280 2011 7374 2007 7522 2008 7671 2013 7816 573 7874 +2014 7931 2015 7989 2016 8047 2017 8105 2010 8163 2022 8310 2009 8476 2018 8623 2019 8767 2020 8911 +2024 9056 2021 9115 2027 9262 2025 9401 2029 9545 2026 9603 2037 9710 2030 9903 2040 10079 2031 10254 +% 1927 0 obj << -/D [1671 0 R /XYZ 150.705 716.092 null] +/D [1923 0 R /XYZ 99.895 679.195 null] >> -% 1670 0 obj +% 1922 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1676 0 obj +% 1932 0 obj << /Type /Page -/Contents 1677 0 R -/Resources 1675 0 R +/Contents 1933 0 R +/Resources 1931 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 1928 0 R +/Annots [ 1930 0 R ] +>> +% 1930 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [352.526 426.323 428.58 438.383] +/A << /S /GoTo /D (vdata) >> >> -% 1678 0 obj +% 1934 0 obj << -/D [1676 0 R /XYZ 98.895 753.953 null] +/D [1932 0 R /XYZ 149.705 753.953 null] >> -% 1679 0 obj +% 529 0 obj << -/D [1676 0 R /XYZ 99.895 528.579 null] +/D [1932 0 R /XYZ 150.705 716.092 null] >> -% 1680 0 obj +% 1935 0 obj << -/D [1676 0 R /XYZ 99.895 494.104 null] +/D [1932 0 R /XYZ 150.705 678.98 null] >> -% 1681 0 obj +% 1936 0 obj << -/D [1676 0 R /XYZ 99.895 407.25 null] +/D [1932 0 R /XYZ 150.705 679.195 null] >> -% 1675 0 obj +% 1931 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1685 0 obj +% 1939 0 obj << /Type /Page -/Contents 1686 0 R -/Resources 1684 0 R +/Contents 1940 0 R +/Resources 1938 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 1928 0 R +/Annots [ 1937 0 R ] >> -% 1687 0 obj +% 1937 0 obj << -/D [1685 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 574.778 369.462 586.838] +/A << /S /GoTo /D (spdata) >> >> -% 461 0 obj +% 1941 0 obj << -/D [1685 0 R /XYZ 150.705 716.092 null] +/D [1939 0 R /XYZ 98.895 753.953 null] >> -% 1684 0 obj +% 533 0 obj +<< +/D [1939 0 R /XYZ 99.895 716.092 null] +>> +% 1942 0 obj +<< +/D [1939 0 R /XYZ 99.895 679.441 null] +>> +% 1943 0 obj +<< +/D [1939 0 R /XYZ 99.895 679.657 null] +>> +% 1938 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F102 1016 0 R >> /ProcSet [ /PDF /Text ] >> -% 1689 0 obj +% 1946 0 obj << /Type /Page -/Contents 1690 0 R -/Resources 1688 0 R +/Contents 1947 0 R +/Resources 1945 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 1928 0 R +/Annots [ 1944 0 R ] >> -% 1691 0 obj +% 1944 0 obj << -/D [1689 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [452.361 574.59 528.415 586.65] +/A << /S /GoTo /D (vdata) >> >> -% 1692 0 obj +% 1948 0 obj << -/D [1689 0 R /XYZ 99.895 552.489 null] +/D [1946 0 R /XYZ 149.705 753.953 null] >> -% 1693 0 obj +% 537 0 obj << -/D [1689 0 R /XYZ 99.895 518.014 null] +/D [1946 0 R /XYZ 150.705 716.092 null] >> -% 1694 0 obj +% 1949 0 obj << -/D [1689 0 R /XYZ 99.895 431.16 null] +/D [1946 0 R /XYZ 150.705 679.413 null] >> -% 1688 0 obj +% 1950 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/D [1946 0 R /XYZ 150.705 679.628 null] +>> +% 1945 0 obj +<< +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F102 1016 0 R >> /ProcSet [ /PDF /Text ] >> -% 1696 0 obj +% 1952 0 obj << /Type /Page -/Contents 1697 0 R -/Resources 1695 0 R +/Contents 1953 0 R +/Resources 1951 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R ->> -% 1698 0 obj -<< -/D [1696 0 R /XYZ 149.705 753.953 null] +/Parent 1928 0 R >> -% 465 0 obj +% 1954 0 obj << -/D [1696 0 R /XYZ 150.705 716.092 null] +/D [1952 0 R /XYZ 98.895 753.953 null] >> -% 1695 0 obj +% 1951 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F102 1016 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1700 0 obj +% 1956 0 obj << /Type /Page -/Contents 1701 0 R -/Resources 1699 0 R +/Contents 1957 0 R +/Resources 1955 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Parent 1928 0 R >> -% 1702 0 obj +% 1958 0 obj << -/D [1700 0 R /XYZ 98.895 753.953 null] +/D [1956 0 R /XYZ 149.705 753.953 null] >> -% 1703 0 obj +% 541 0 obj << -/D [1700 0 R /XYZ 99.895 540.534 null] +/D [1956 0 R /XYZ 150.705 716.092 null] >> -% 1704 0 obj +% 1955 0 obj << -/D [1700 0 R /XYZ 99.895 506.059 null] +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1705 0 obj +% 1962 0 obj << -/D [1700 0 R /XYZ 99.895 419.205 null] +/Type /Page +/Contents 1963 0 R +/Resources 1961 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 1966 0 R +/Annots [ 1959 0 R 1960 0 R ] >> -% 1699 0 obj +% 1959 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 442.264 387.374 454.323] +/A << /S /GoTo /D (precdata) >> >> -% 1707 0 obj +% 1960 0 obj << -/Type /Page -/Contents 1708 0 R -/Resources 1706 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1683 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [171.257 352.6 177.533 366.303] +/A << /S /GoTo /D (Hfootnote.4) >> >> -% 1709 0 obj +% 1964 0 obj << -/D [1707 0 R /XYZ 149.705 753.953 null] +/D [1962 0 R /XYZ 98.895 753.953 null] >> -% 469 0 obj +% 545 0 obj +<< +/D [1962 0 R /XYZ 99.895 716.092 null] +>> +% 1965 0 obj << -/D [1707 0 R /XYZ 150.705 716.092 null] +/D [1962 0 R /XYZ 114.242 129.79 null] >> -% 1706 0 obj +% 1961 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R >> /ProcSet [ /PDF /Text ] >> -% 1711 0 obj +% 1972 0 obj << /Type /Page -/Contents 1712 0 R -/Resources 1710 0 R +/Contents 1973 0 R +/Resources 1971 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Parent 1966 0 R +/Annots [ 1967 0 R 1968 0 R 1969 0 R 1970 0 R ] >> -% 1713 0 obj +% 1967 0 obj << -/D [1711 0 R /XYZ 98.895 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [365.487 576.377 443.006 588.437] +/A << /S /GoTo /D (spdata) >> >> -% 1714 0 obj +% 1968 0 obj << -/D [1711 0 R /XYZ 99.895 552.489 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [451.902 505.741 518.96 517.801] +/A << /S /GoTo /D (precdata) >> >> -% 1715 0 obj +% 1969 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [422.264 435.105 489.322 447.165] +/A << /S /GoTo /D (descdata) >> +>> +% 1970 0 obj << -/D [1711 0 R /XYZ 99.895 518.014 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [368.227 128.475 435.285 140.535] +/A << /S /GoTo /D (precdata) >> >> -% 1716 0 obj +% 1974 0 obj << -/D [1711 0 R /XYZ 99.895 431.16 null] +/D [1972 0 R /XYZ 149.705 753.953 null] >> -% 1710 0 obj +% 549 0 obj +<< +/D [1972 0 R /XYZ 150.705 716.092 null] +>> +% 1971 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1719 0 obj +% 1976 0 obj << /Type /Page -/Contents 1720 0 R -/Resources 1718 0 R +/Contents 1977 0 R +/Resources 1975 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R ->> -% 1721 0 obj -<< -/D [1719 0 R /XYZ 149.705 753.953 null] +/Parent 1966 0 R >> -% 473 0 obj +% 1978 0 obj << -/D [1719 0 R /XYZ 150.705 716.092 null] +/D [1976 0 R /XYZ 98.895 753.953 null] >> -% 1718 0 obj +% 1975 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1723 0 obj +% 1984 0 obj << /Type /Page -/Contents 1724 0 R -/Resources 1722 0 R +/Contents 1985 0 R +/Resources 1983 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R ->> -% 1725 0 obj -<< -/D [1723 0 R /XYZ 98.895 753.953 null] ->> -% 1726 0 obj -<< -/D [1723 0 R /XYZ 99.895 528.579 null] +/Parent 1966 0 R +/Annots [ 1979 0 R 1980 0 R 1981 0 R 1982 0 R ] >> -% 1727 0 obj +% 1979 0 obj << -/D [1723 0 R /XYZ 99.895 494.104 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [371.126 573.77 438.184 585.83] +/A << /S /GoTo /D (precdata) >> >> -% 1728 0 obj +% 1980 0 obj << -/D [1723 0 R /XYZ 99.895 407.25 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [393.303 517.98 469.357 530.039] +/A << /S /GoTo /D (vdata) >> >> -% 1722 0 obj +% 1981 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [374.822 462.189 441.88 474.248] +/A << /S /GoTo /D (descdata) >> >> -% 1730 0 obj +% 1982 0 obj << -/Type /Page -/Contents 1731 0 R -/Resources 1729 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [393.303 272.899 469.357 284.958] +/A << /S /GoTo /D (vdata) >> >> -% 1732 0 obj +% 1986 0 obj << -/D [1730 0 R /XYZ 149.705 753.953 null] +/D [1984 0 R /XYZ 149.705 753.953 null] >> -% 477 0 obj +% 553 0 obj << -/D [1730 0 R /XYZ 150.705 716.092 null] +/D [1984 0 R /XYZ 150.705 716.092 null] >> -% 1729 0 obj +% 1983 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1734 0 obj +% 1989 0 obj << /Type /Page -/Contents 1735 0 R -/Resources 1733 0 R +/Contents 1990 0 R +/Resources 1988 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R ->> -% 1736 0 obj -<< -/D [1734 0 R /XYZ 98.895 753.953 null] +/Parent 1966 0 R +/Annots [ 1987 0 R ] >> -% 1737 0 obj +% 1987 0 obj << -/D [1734 0 R /XYZ 99.895 528.579 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 573.77 387.374 585.83] +/A << /S /GoTo /D (precdata) >> >> -% 1738 0 obj +% 1991 0 obj << -/D [1734 0 R /XYZ 99.895 494.104 null] +/D [1989 0 R /XYZ 98.895 753.953 null] >> -% 1739 0 obj +% 557 0 obj << -/D [1734 0 R /XYZ 99.895 407.25 null] +/D [1989 0 R /XYZ 99.895 716.092 null] >> -% 1733 0 obj +% 1988 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F94 915 0 R /F52 585 0 R /F112 1682 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R /F60 666 0 R /F91 914 0 R >> /ProcSet [ /PDF /Text ] >> -% 1741 0 obj +% 1993 0 obj << /Type /Page -/Contents 1742 0 R -/Resources 1740 0 R +/Contents 1994 0 R +/Resources 1992 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1717 0 R +/Parent 1966 0 R >> -% 1743 0 obj +% 1995 0 obj << -/D [1741 0 R /XYZ 149.705 753.953 null] +/D [1993 0 R /XYZ 149.705 753.953 null] >> -% 481 0 obj +% 561 0 obj << -/D [1741 0 R /XYZ 150.705 716.092 null] +/D [1993 0 R /XYZ 150.705 716.092 null] >> -% 1740 0 obj +% 1992 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F83 813 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1745 0 obj +% 1999 0 obj << /Type /Page -/Contents 1746 0 R -/Resources 1744 0 R +/Contents 2000 0 R +/Resources 1998 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/Parent 2002 0 R +/Annots [ 1996 0 R 1997 0 R ] >> -% 1747 0 obj +% 1996 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 573.77 387.374 585.83] +/A << /S /GoTo /D (precdata) >> +>> +% 1997 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [320.317 498.054 387.374 510.114] +/A << /S /GoTo /D (precdata) >> +>> +% 2001 0 obj << -/D [1745 0 R /XYZ 98.895 753.953 null] +/D [1999 0 R /XYZ 98.895 753.953 null] >> -% 1748 0 obj +% 565 0 obj << -/D [1745 0 R /XYZ 99.895 564.444 null] +/D [1999 0 R /XYZ 99.895 716.092 null] >> -% 1749 0 obj +% 1998 0 obj << -/D [1745 0 R /XYZ 99.895 529.97 null] +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> +/ProcSet [ /PDF /Text ] >> -% 1751 0 obj +% 2004 0 obj << -/D [1745 0 R /XYZ 99.895 441.815 null] +/Type /Page +/Contents 2005 0 R +/Resources 2003 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2002 0 R >> -% 1752 0 obj +% 2006 0 obj << -/D [1745 0 R /XYZ 99.895 409.935 null] +/D [2004 0 R /XYZ 149.705 753.953 null] >> -% 1753 0 obj +% 569 0 obj << -/D [1745 0 R /XYZ 99.895 323.08 null] +/D [2004 0 R /XYZ 150.705 716.092 null] >> -% 1744 0 obj +% 2003 0 obj << -/Font << /F54 586 0 R /F51 584 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R /F1 1750 0 R /F96 1154 0 R /F94 915 0 R /F112 1682 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1756 0 obj +% 2011 0 obj << /Type /Page -/Contents 1757 0 R -/Resources 1755 0 R +/Contents 2012 0 R +/Resources 2010 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/Parent 2002 0 R +/Annots [ 2007 0 R 2008 0 R ] >> -% 1758 0 obj +% 2007 0 obj << -/D [1756 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [329.163 275.278 335.44 288.868] +/A << /S /GoTo /D (Hfootnote.5) >> >> -% 485 0 obj +% 2008 0 obj << -/D [1756 0 R /XYZ 150.705 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [291.943 134.696 369.462 146.755] +/A << /S /GoTo /D (spdata) >> >> -% 1759 0 obj +% 2013 0 obj << -/D [1756 0 R /XYZ 150.705 222.691 null] +/D [2011 0 R /XYZ 98.895 753.953 null] >> -% 1755 0 obj +% 573 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F61 1360 0 R /F85 814 0 R /F83 813 0 R >> -/ProcSet [ /PDF /Text ] +/D [2011 0 R /XYZ 99.895 716.092 null] >> -% 1761 0 obj +% 2014 0 obj << -/Type /Page -/Contents 1762 0 R -/Resources 1760 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/D [2011 0 R /XYZ 99.895 444.811 null] >> -% 1763 0 obj +% 2015 0 obj << -/D [1761 0 R /XYZ 98.895 753.953 null] +/D [2011 0 R /XYZ 99.895 444.971 null] >> -% 489 0 obj +% 2016 0 obj << -/D [1761 0 R /XYZ 99.895 716.092 null] +/D [2011 0 R /XYZ 99.895 433.015 null] >> -% 1764 0 obj +% 2017 0 obj << -/D [1761 0 R /XYZ 99.895 222.691 null] +/D [2011 0 R /XYZ 114.242 129.79 null] >> -% 1760 0 obj +% 2010 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F61 1360 0 R /F85 814 0 R /F52 585 0 R /F83 813 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F69 1460 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1768 0 obj +% 2022 0 obj << /Type /Page -/Contents 1769 0 R -/Resources 1767 0 R +/Contents 2023 0 R +/Resources 2021 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R -/Annots [ 1765 0 R 1766 0 R ] +/Parent 2002 0 R +/Annots [ 2009 0 R 2018 0 R 2019 0 R 2020 0 R ] >> -% 1765 0 obj +% 2009 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [196.011 499.949 202.985 512.009] -/A << /S /GoTo /D (listing.5) >> +/Rect [342.753 654.503 409.811 666.562] +/A << /S /GoTo /D (precdata) >> >> -% 1766 0 obj +% 2018 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [196.625 344.532 203.599 356.591] -/A << /S /GoTo /D (listing.6) >> +/Rect [393.303 584.479 469.357 596.539] +/A << /S /GoTo /D (vdata) >> >> -% 1770 0 obj +% 2019 0 obj << -/D [1768 0 R /XYZ 149.705 753.953 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [393.303 514.456 469.357 526.516] +/A << /S /GoTo /D (vdata) >> >> -% 493 0 obj +% 2020 0 obj +<< +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.753 374.41 409.811 386.47] +/A << /S /GoTo /D (descdata) >> +>> +% 2024 0 obj << -/D [1768 0 R /XYZ 150.705 716.092 null] +/D [2022 0 R /XYZ 149.705 753.953 null] >> -% 1767 0 obj +% 2021 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F69 1460 0 R >> /ProcSet [ /PDF /Text ] >> -% 1774 0 obj +% 2027 0 obj << /Type /Page -/Contents 1775 0 R -/Resources 1773 0 R +/Contents 2028 0 R +/Resources 2026 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R +/Parent 2002 0 R +/Annots [ 2025 0 R ] >> -% 1776 0 obj -<< -/D [1774 0 R /XYZ 98.895 753.953 null] ->> -% 1771 0 obj +% 2025 0 obj << -/D [1774 0 R /XYZ 99.895 411.235 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [342.493 554.876 418.548 566.936] +/A << /S /GoTo /D (vdata) >> >> -% 1772 0 obj +% 2029 0 obj << -/D [1774 0 R /XYZ 99.895 182.902 null] +/D [2027 0 R /XYZ 98.895 753.953 null] >> -% 1773 0 obj +% 2026 0 obj << -/Font << /F59 812 0 R /F112 1682 0 R /F54 586 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1778 0 obj +% 2037 0 obj << /Type /Page -/Contents 1779 0 R -/Resources 1777 0 R +/Contents 2038 0 R +/Resources 2036 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1754 0 R ->> -% 1780 0 obj -<< -/D [1778 0 R /XYZ 149.705 753.953 null] ->> -% 497 0 obj -<< -/D [1778 0 R /XYZ 150.705 716.092 null] ->> -% 1781 0 obj -<< -/D [1778 0 R /XYZ 150.705 690.058 null] +/Parent 2002 0 R +/Annots [ 2030 0 R 2040 0 R 2031 0 R 2032 0 R 2033 0 R 2034 0 R 2035 0 R ] >> -% 1782 0 obj +% 2030 0 obj << -/D [1778 0 R /XYZ 150.705 693.143 null] +/Type /Annot +/Border[0 0 0]/H/I/C[0 1 1] +/Rect [367.891 636.522 495.412 648.582] +/Subtype/Link/A<> >> -% 1777 0 obj +% 2040 0 obj << -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Border[0 0 0]/H/I/C[0 1 1] +/Rect [174.615 624.567 218.45 636.627] +/Subtype/Link/A<> >> -% 1784 0 obj +% 2031 0 obj << -/Type /Page -/Contents 1785 0 R -/Resources 1783 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R +/Type /Annot +/Border[0 0 0]/H/I/C[0 1 1] +/Rect [191.223 593.098 397.198 605.158] +/Subtype/Link/A<> >> endstream endobj -1793 0 obj +2053 0 obj << -/Length 1526 +/Length 8663 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(8.3)-1000(psb)]TJ -ET -q -1 0 0 1 198.238 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 201.825 706.129 Td [(set)]TJ -ET -q -1 0 0 1 217.809 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 221.396 706.129 Td [(errverbosity)-190(\227)-190(Sets)-190(the)-190(verbosity)-190(of)-190(error)-190(messages)]TJ/F54 9.9626 Tf -68.95 -24.942 Td [(c)-175(a)-175(l)-174(l)-921(p)-147(s)-146(b)]TJ -ET -q -1 0 0 1 202.736 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 207.181 681.187 Td [(s)-146(e)-146(t)]TJ -ET -q -1 0 0 1 224.391 681.387 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 228.836 681.187 Td [(e)-146(r)-146(r)-146(v)-147(e)-146(r)-146(b)-146(o)-146(s)-146(i)-146(t)-147(y)-279(\050)-151(v)-151(\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +q +1 0 0 1 99.895 421.197 cm +0 0 343.711 290.909 re f +Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +BT +/F120 8.9664 Tf 112.299 701.446 Td [(!)-525(My)-525(own)-525(home-grown)-525(matrix)-525(generator)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf 0 -10.958 Td [(call)]TJ +0 g 0 G + [-525(gen_matrix\050ctxt,)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [(idim)]TJ +0 g 0 G + [(,desc_a,a,x,info\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(/=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(goto)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(call)]TJ +0 g 0 G + [-525(a%cscnv\050agpu,info,mold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(aelg\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(/=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(goto)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(9999)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 0 -10.959 Td [(xtmp)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(x%get_vect\050\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(xg%bld\050xtmp,mold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(vmold\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ +0 g 0 G + [-525(bg%bld\050size\050xtmp\051,mold)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ +0 g 0 G + [(vmold\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 0 -21.918 Td [(!)-525(Do)-525(sparse)-525(MV)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf 0 -10.958 Td [(call)]TJ +0 g 0 G + [-525(psb_spmm\050done,agpu,xg,dzero,bg,desc_a,info\051)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + -9.415 -32.877 Td [(9999)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(continue)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.959 Td [(if)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(\050info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(==)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 14.122 -10.959 Td [(write)]TJ +0 g 0 G + [(\050)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(,)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [-525(\01542\015)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(else)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 14.122 -10.959 Td [(write)]TJ +0 g 0 G + [(\050)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(,)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ +0 g 0 G + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.25 0.44 0.63 rg 0.25 0.44 0.63 RG + [-525(\015Something)-525(went)-525(wrong)-525(\015)]TJ +0 g 0 G + [(,info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(end)-525(if)]TJ 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -32.876 Td [(call)]TJ 0 g 0 G + [-525(psb_cuda_exit\050\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -78.131 -27.895 Td [(T)90(ype:)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(psb_exit\050ctxt\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(stop)]TJ 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(program)]TJ 0 g 0 G - 0 -19.925 Td [(v)]TJ + [-525(my_cuda_test)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(verbosity)-250(level)]TJ 14.386 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)]TJ +/F62 9.9626 Tf 11.955 -24.283 Td [(A)-378(full)-379(example)-378(of)-379(this)-378(strategy)-379(can)-378(be)-378(seen)-379(in)-378(the)]TJ/F67 9.9626 Tf 229.675 0 Td [(test/ext/kernel)]TJ/F62 9.9626 Tf 82.226 0 Td [(and)]TJ/F67 9.9626 Tf -326.845 -11.955 Td [(test/cuda/kernel)]TJ/F62 9.9626 Tf 86.402 0 Td [(subdir)18(ectories,)-278(wher)18(e)-273(we)-272(pr)18(ovide)-273(sample)-272(pr)18(ograms)-273(to)-273(test)]TJ -86.402 -11.955 Td [(the)-259(speed)-259(of)-259(the)-259(sparse)-259(matrix-vector)-259(pr)18(oduct)-259(with)-259(the)-259(various)-259(data)-259(str)8(uctur)18(es)]TJ 0 -11.956 Td [(included)-250(in)-250(the)-250(library)111(.)]TJ/F59 11.9552 Tf 0 -29.238 Td [(12.2)-1000(Extensions')-250(Data)-250(Structures)]TJ/F62 9.9626 Tf 0 -18.999 Td [(Access)-232(to)-233(the)-232(facilities)-232(pr)18(ovided)-233(by)-232(the)-232(EXT)-233(library)-232(is)-233(mainly)-232(achieved)-232(thr)18(ough)]TJ 0 -11.955 Td [(the)-384(data)-385(types)-384(that)-384(ar)18(e)-385(pr)18(ovi)1(ded)-385(within.)-713(The)-384(data)-384(classes)-385(ar)18(e)-384(derived)-384(fr)18(om)]TJ 0 -11.955 Td [(the)-247(base)-247(classes)-248(in)-247(PSBLAS,)-247(thr)18(ough)-247(the)-247(Fortran)-247(2003)-248(mechanism)-247(of)]TJ/F60 9.9626 Tf 299.187 0 Td [(type)-247(exten-)]TJ -299.187 -11.956 Td [(sion)]TJ/F62 9.9626 Tf 19.098 0 Td [([)]TJ +1 0 0 rg 1 0 0 RG + [(17)]TJ 0 g 0 G - 139.477 -475.183 Td [(140)]TJ + [(].)]TJ -4.154 -11.973 Td [(The)-255(data)-255(classes)-255(ar)18(e)-254(divided)-255(between)-255(the)-255(general)-255(purpose)-254(CPU)-255(extensions,)]TJ -14.944 -11.955 Td [(the)-232(GPU)-232(interfaces)-232(and)-232(the)-232(RSB)-232(interfaces.)-304(In)-232(the)-232(description)-232(we)-232(will)-232(make)-232(use)]TJ 0 -11.955 Td [(of)-250(the)-250(notation)-250(intr)18(oduced)-250(in)-250(T)92(able)]TJ +0 0 1 rg 0 0 1 RG + [-250(21)]TJ +0 g 0 G + [(.)]TJ/F59 11.9552 Tf 0 -29.238 Td [(12.3)-1000(CPU-class)-250(extensions)]TJ/F59 9.9626 Tf 0 -19 Td [(ELLP)74(ACK)]TJ/F62 9.9626 Tf 0 -18.999 Td [(The)-190(ELLP)92(ACK/ITP)92(ACK)-190(format)-190(\050shown)-190(in)-190(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-190(6)]TJ +0 g 0 G + [(\051)-190(comprises)-190(two)-190(2-dimensional)]TJ 0 -11.956 Td [(arrays)]TJ/F67 9.9626 Tf 30.302 0 Td [(AS)]TJ/F62 9.9626 Tf 13.165 0 Td [(and)]TJ/F67 9.9626 Tf 19.571 0 Td [(JA)]TJ/F62 9.9626 Tf 13.166 0 Td [(with)]TJ/F67 9.9626 Tf 22.958 0 Td [(M)]TJ/F62 9.9626 Tf 7.935 0 Td [(r)18(ows)-272(and)]TJ/F67 9.9626 Tf 44.005 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 34.087 0 Td [(columns,)-277(wher)18(e)]TJ/F67 9.9626 Tf 72.949 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 34.087 0 Td [(is)-272(th)1(e)-272(maxi-)]TJ -292.225 -11.955 Td [(mum)-211(number)-211(of)-212(nonzer)18(os)-211(in)-211(any)-211(r)18(ow)-211([)]TJ/F59 9.9626 Tf 167.954 0 Td [(?)]TJ/F62 9.9626 Tf 4.424 0 Td [(].)-297(Each)-211(r)18(ow)-211(of)-212(the)-211(arrays)]TJ/F67 9.9626 Tf 108.255 0 Td [(AS)]TJ/F62 9.9626 Tf 12.564 0 Td [(and)]TJ/F67 9.9626 Tf 18.971 0 Td [(JA)]TJ/F62 9.9626 Tf 12.565 0 Td [(con-)]TJ -324.733 -11.955 Td [(tains)-218(the)-217(coef)18(\002cients)-218(and)-217(column)-218(indices;)-228(r)18(ows)-218(shorter)-217(than)]TJ/F67 9.9626 Tf 260.483 0 Td [(MAXNZR)]TJ/F62 9.9626 Tf 33.549 0 Td [(ar)18(e)-218(padded)]TJ -294.032 -11.955 Td [(with)-315(zer)18(o)-315(coef)18(\002cients)-315(and)-315(appr)18(opriate)-315(column)-315(indices,)-331(e.g.)-505(the)-315(last)-315(valid)-315(one)]TJ 0 -11.955 Td [(found)-250(in)-250(the)-250(same)-250(r)18(ow)92(.)]TJ +0 g 0 G + 164.384 -29.888 Td [(163)]TJ 0 g 0 G ET endstream endobj -1799 0 obj +2063 0 obj << -/Length 2016 +/Length 4574 >> stream 0 g 0 G 0 g 0 G +0 g 0 G +0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(8.4)-1000(psb)]TJ +/F62 9.9626 Tf 189.471 698.871 Td [(T)92(able)-250(21:)-310(Notation)-250(for)-250(parameters)-250(describing)-250(a)-250(sparse)-250(matrix)]TJ +0 g 0 G +0 g 0 G +0 g 0 G ET q -1 0 0 1 147.429 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 222.652 684.904 cm +[]0 d 0 J 0.398 w 0 0 m 199.817 0 l S Q BT -/F51 11.9552 Tf 151.016 706.129 Td [(set)]TJ +/F62 7.9701 Tf 228.629 678.079 Td [(Name)-3364(Description)]TJ ET q -1 0 0 1 166.999 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 222.652 675.041 cm +[]0 d 0 J 0.398 w 0 0 m 199.817 0 l S Q BT -/F51 11.9552 Tf 170.586 706.129 Td [(erraction)-223(\227)-223(Set)-222(the)-223(type)-223(of)-223(action)-223(to)-222(be)-223(taken)-223(upon)]TJ -43.792 -13.948 Td [(error)-250(condition)]TJ/F54 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-926(p)-151(s)-151(b)]TJ +/F62 7.9701 Tf 228.629 668.216 Td [(M)-5111(Number)-250(of)-250(r)18(ows)-250(in)-250(matrix)]TJ 0 -9.464 Td [(N)-5226(Number)-250(of)-250(columns)-250(in)-250(matrix)]TJ 0 -9.465 Td [(NZ)-4559(Number)-250(of)-250(nonzer)18(os)-250(in)-250(matrix)]TJ 0 -9.464 Td [(A)111(VGNZR)-1739(A)92(verage)-250(number)-250(of)-250(nonzer)18(os)-250(per)-250(r)18(ow)]TJ 0 -9.465 Td [(MAXNZR)-1500(Maximum)-250(number)-250(of)-250(nonzer)18(os)-250(per)-250(r)18(ow)]TJ 0 -9.464 Td [(NDIAG)-2574(Numer)18(o)-250(of)-250(nonzer)18(o)-250(diagonals)]TJ 0 -9.465 Td [(AS)-4754(Coef)18(\002cients)-250(array)]TJ 0 -9.464 Td [(IA)-4942(Row)-250(indices)-250(array)]TJ 0 -9.465 Td [(JA)-4946(Column)-250(indices)-250(array)]TJ 0 -9.464 Td [(IRP)-4448(Row)-250(start)-250(pointers)-250(array)]TJ 0 -9.465 Td [(JCP)-4411(Column)-250(start)-250(pointers)-250(array)]TJ 0 -9.464 Td [(NZR)-3891(Number)-250(of)-250(nonzer)18(os)-250(per)-250(r)18(ow)-250(array)]TJ 0 -9.465 Td [(OFFSET)-2410(Of)18(fset)-250(for)-250(diagonals)]TJ ET q -1 0 0 1 152.113 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 222.652 551.604 cm +[]0 d 0 J 0.398 w 0 0 m 199.817 0 l S Q -BT -/F54 9.9626 Tf 156.605 667.24 Td [(s)-151(e)-151(t)]TJ -ET +0 g 0 G +0 g 0 G +1 0 0 1 247.614 396.819 cm q -1 0 0 1 173.955 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 178.447 667.24 Td [(e)-151(r)-151(r)-150(a)-151(c)-151(t)-151(i)-151(o)-151(n)-284(\050)-296(e)-163(r)-162(r)]TJ -ET +.33653 0 0 .33653 0 0 cm q -1 0 0 1 257.102 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 0 0 cm +/Im6 Do +Q Q -BT -/F54 9.9626 Tf 261.712 667.24 Td [(a)-163(c)-162(t)-296(\051)]TJ 0 g 0 G +1 0 0 1 -247.614 -396.819 cm +BT +/F62 9.9626 Tf 245.769 374.901 Td [(Figur)18(e)-250(5:)-310(Example)-250(of)-250(sparse)-250(matrix)]TJ 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -161.817 -27.896 Td [(T)90(ype:)]TJ + -80.12 -32.171 Td [(The)-289(matrix-vector)-289(pr)18(oduct)]TJ/F60 9.9626 Tf 120.156 0 Td [(y)]TJ/F93 10.3811 Tf 8.719 0 Td [(=)]TJ/F60 9.9626 Tf 12.305 0 Td [(A)-42(x)]TJ/F62 9.9626 Tf 15.697 0 Td [(can)-289(be)-289(computed)-289(with)-289(the)-289(code)-290(shown)]TJ -171.821 -11.956 Td [(in)-365(Alg.)]TJ +0 0 1 rg 0 0 1 RG + [-365(1)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(;)-423(it)-365(costs)-365(one)-365(memory)-366(write)-365(per)-365(outer)-365(iteration,)-394(plus)-365(thr)18(ee)-365(memory)]TJ 0 -11.955 Td [(r)18(eads)-250(and)-250(two)-250(\003oating-point)-250(operations)-250(per)-250(inner)-250(iteration.)]TJ 14.944 -11.955 Td [(Unless)-251(all)-252(r)18(ows)-251(have)-251(exactly)-251(the)-252(same)-251(number)-251(of)-252(nonzer)18(os,)-251(some)-252(of)-251(the)-251(co-)]TJ -14.944 -11.955 Td [(ef)18(\002cients)-225(in)-226(the)]TJ/F67 9.9626 Tf 68.551 0 Td [(AS)]TJ/F62 9.9626 Tf 12.705 0 Td [(array)-225(will)-226(be)-225(zer)18(os;)-233(ther)18(efor)18(e)-226(this)-225(data)-225(str)8(uctur)18(e)-225(will)-226(have)-225(an)]TJ -81.256 -11.955 Td [(over)18(head)-261(both)-261(in)-260(terms)-261(of)-261(memory)-261(space)-261(and)-261(r)18(edundant)-260(operations)-261(\050multipli-)]TJ 0 -11.956 Td [(cations)-250(by)-250(zer)18(o\051.)-310(The)-250(over)18(head)-250(can)-250(be)-250(acceptable)-250(if:)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + 12.453 -19.399 Td [(1.)]TJ 0 g 0 G + [-500(The)-289(maximum)-289(number)-289(of)-289(nonzer)18(os)-289(per)-290(r)18(ow)-289(is)-289(not)-289(much)-289(lar)18(ger)-289(than)-289(the)]TJ 12.453 -11.956 Td [(average;)]TJ 0 g 0 G - 0 -19.925 Td [(err)]TJ -ET -q -1 0 0 1 113.225 599.693 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 116.214 599.494 Td [(act)]TJ + -12.453 -19.662 Td [(2.)]TJ 0 g 0 G -/F54 9.9626 Tf 17.703 0 Td [(the)-250(type)-250(of)-250(action.)]TJ -9.115 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)74(.)-310(Possible)-250(values:)]TJ/F59 9.9626 Tf 179.116 0 Td [(psb_act_ret)]TJ/F54 9.9626 Tf 57.534 0 Td [(,)]TJ/F59 9.9626 Tf 4.981 0 Td [(psb_act_abort)]TJ/F54 9.9626 Tf 67.995 0 Td [(.)]TJ + [-500(The)-273(r)18(egularity)-274(of)-273(the)-274(data)-273(str)8(uctur)18(e)-274(allows)-273(for)-274(faster)-273(code,)-279(e.g.)-381(by)-273(allow-)]TJ 12.453 -11.955 Td [(ing)-247(vectorization,)-247(ther)18(eby)-247(of)18(fsetting)-247(the)-247(additional)-246(storage)-247(r)18(equir)18(ements.)]TJ -24.906 -19.4 Td [(In)-372(the)-372(extr)18(eme)-372(case)-372(wher)18(e)-371(the)-372(input)-372(matrix)-372(has)-372(one)-372(full)-372(r)18(ow)92(,)-402(the)-372(ELLP)92(ACK)]TJ 0 -11.955 Td [(str)8(uctur)18(e)-273(would)-273(r)18(equir)18(e)-273(mor)18(e)-273(memory)-273(than)-273(the)-273(normal)-273(2D)-273(array)-273(storage.)-379(The)]TJ 0 -11.956 Td [(ELLP)92(ACK)-305(storage)-305(format)-305(was)-305(very)-305(popular)-305(in)-305(the)-305(vector)-305(computing)-305(days;)-332(in)]TJ 0 -11.955 Td [(modern)-304(CPUs)-305(it)-304(is)-305(not)-304(quite)-305(as)-304(popular)74(,)-318(but)-305(it)-304(is)-305(the)-304(basis)-305(for)-304(many)-305(GPU)-304(for)18(-)]TJ 0 -11.955 Td [(mats.)]TJ 14.944 -11.955 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F67 9.9626 Tf 110.952 0 Td [(psb_T_ell_sparse_mat)]TJ/F62 9.9626 Tf 104.607 0 Td [(:)]TJ 0 g 0 G - -170.149 -461.235 Td [(141)]TJ + -66.12 -38.412 Td [(164)]TJ 0 g 0 G ET endstream endobj -1805 0 obj +2046 0 obj << -/Length 507 +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/mat.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2068 0 R +/BBox [0 0 438 395] +/Resources << +/ProcSet [ /PDF /ImageC ] +/ExtGState << +/R7 2069 0 R +>>/XObject << +/R8 2070 0 R +>>>> +/Length 3551 +/Filter /FlateDecode >> stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(9)-1000(Utilities)]TJ/F54 9.9626 Tf 0 -22.702 Td [(W)92(e)-323(have)-322(some)-323(utilities)-322(available)-323(for)-322(input)-323(and)-323(output)-322(of)-323(sparse)-322(matrices;)-359(the)]TJ 0 -11.955 Td [(interfaces)-250(to)-250(these)-250(r)18(outines)-250(ar)18(e)-250(available)-250(in)-250(the)-250(module)]TJ/F59 9.9626 Tf 242.009 0 Td [(psb_util_mod)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)]TJ -0 g 0 G - -140.39 -580.689 Td [(142)]TJ -0 g 0 G -ET - +xœí\K®,· ÷*j®èÿ™ðØö ¸pìÁívÙ~HŠ‡¬R 'žƒ¤ø·É–ÈsDEIÕþég<ÿ§?Þ¯?|ÛþöúéŽ~Ž0ê{tùãÕz:spMËálÍ @TÔŠžÏÑë^c=kq*¼ ‚jô²°“øxýùÃñ÷ HH¡Ò¨œa†0úä1 +uÖ~|ûõSÉÿøúîÇ8K!-yKdïýŠ9žü©3·œÑÎœµh‘G%Çõ¨sž-f²’â{œÇÌ4j:Ê9ú˜ñˆyfêéòlõœ<ФZÈy>`!‡0Ï1ÔCÎ5œ‰ÛAn“Q@˜}æä°LÌ0~|¾"yŠ“\ô˜Îž;ó=r-áé˜Ô±„väT‡ƒ†³žmÎt¤YÉÛ<|ÊôMª4òÉDbÏ.ƒ¦¤~†|ÒˆM ‘᲋'M•XÎLãB0)"•8SC¡žfƤÑÌl¼" $óçéú =ª÷ñy¿öñ»oÙ"0.@Œö^bLãë9ÐãŠ@•)Rg9öIäèyH.äíJþ>8e ÿmôyäp`tM¾Ž¿h4@{üþ9·9ÐÉÕ>Gn³h\çÕšhŸ/LÄ}¢îy%Ó,£,ÚtéS¤(I˜M_?ÿh=‹öDŽÖ®"ògóùº|\=: +q””_R ÇW4¥Z¯b›gHy­ãøã_y‚ý¦Ö+_¿¾jý+¥Nš¼G!äü´ÿü=%™o~}}1ÿOFÿKɈ—. Jó¼­0n +‚VÒ§ZɼxÍKØJ®í$ã¶LË]÷¨©èA[ +Ä Ý-ª j€ˆ¨) d‹-š“3bFOÆ]±zPá!õ<,î>’Í +-Ë›£O×JXü°j™‰™›a• \«NÔÀæsI•Š žZ<™eU„¢uHª Ír~6d S«RHA¦½ 4cUŸ/ë£wÒ%h›SE²™y¿vWd¸.T K/TÌ Ç +v]¡Žµ?pm.}¯•(_œ¼5´¹fÅ9MÿbqM“ãS/q6†ƒà£j̦Fh÷Ê[‰ÿ$¢ÍÎûµû¢Ç!ðjàšxìD{žTIŠî¶¹\@:-‘T!çØí¹y bšFU£¸Ç,V!†®^a@Ÿ4Ð_Þ¦$2>Ó§Ù:xšé7oר-^brW–_Äû{X•ÕòÎëíŠ$Ä°¤9$Àjâ/býò6ñ n_¼_[×ÏF·å{ÇË\ø’Våê´í¼Žƒ)V¯Ô¦"°šZ‘ÂÌ"Õ¹ÎТV¹¿0 &ïitVÀ¦|ø´ +k¡ aàNB’Ô#yÉB'•„”ŸÂÔÂÞ5eÕå™Ï&ø”f®ºRº@æ’Š¾c²T¸ÏáLž¨ì¡‰ÊA-ä8Vi¯> SEa-€6µçr»F.õ*'‡ÎŽæÁ‹Ø:9%e\ +.Óh¹#ßÔZ55ÌóRoÍ9Vyµ“þðfG»C´Z +-–Ð{•Rf¥¼ë×À¦wôò€=Œ‘fÃÆ÷NЀS‰|ÔdœrËz +ˆGZîÏ/¬ùvª{Í-À…î +ö•úׯ½7üšÆINAJÜX™FQÑÚze•ôšßY¥Ôo¬¿ºà¬ š[(—u_°Í>XÁ¿~ tè½á—ÇqZ‘ +rµçõ“iP·f¾È*^×ò2¦xå˯¹„ä-Rì‚ {åk­|a•/|Xýd(ÐÂp«…ÇŠØS¹IäÂ\ñtvÐ[¨2=„5_¼ð嫼|)g”ù†ºWE/{¡ÐšÝœ™"°€¬Ä6 +´'ÒÒÛY>|¼T¼PhÅøì2YAËê½xšSß[_Š¹ÎFÑÛD”5PhÝc½¥.Ên^e¾¥Uÿh¡ðPìÞá¯ãYŒ8J•b-/göQù•½÷E“ ϦÂ+ù4~ߌ+—©­Rú@‘97àòrA ¹óÛ 00úàùdTþxÕF5°³àh=•Ù-j¥QníWn%È5§ÙÍM²³yVÙ¹©Â«——‹ ·åÕV¨•²ut*_¸©Æ0ªÅ-jcÆQÃïhœ›k4üÐñ[ç†_ó@£?öá +fݸ90jA5Q-ì$8h%¦;eÙ?;5×,d”uåM^§¼,ç*NmRZnò³Ê&L„_(˜uWàæÀ¨´Dµ°“Ð ó‰­}sËÒË©™F©•Ä»qc–»ìÖmo9„öô×ûC„[(€ Ýœ™"@„ÂÚ:?–äŤْ9+§h¸¢éGšnC-¼µèY²Êœ=0U´¬ }õ7Qpö€†&X­ú§²^—„•){‚jó2°s´øH^ˆmÐFŽgmY‰ñÏ0‘¿²Á뙞ޞÅ-¿½£"Óš‹ïRjÚq ;dØW^¬IÞ W3ù:€¨´Š)€W{oð«'1ÒµKöe5´³7=M¡ý\®|îxcWb>iåi&;?Èœ€CZEÖÒLznùa€ ”ã¡©üþo9`1£sÝ~Aò2kÔ0°±ÐÅë‘ÌtßøžØ1M“ÖdŠaŒ57i®%>ÊÎò.(d‚:Ö{ÜÐÌBé£N³`rÇ|&¥5¡`!5šoâsù€Ì¸PX E ;ÝA?”›Þ„ñÛÓ=+[çe{Ždžæ ¼gû4ùµ{o0æªqW-»f%kô×\n,ÛÕâÕ# —`ã´ÞÊR‡xº‡ ;çÂ… òyNS*Oø9eYZ­Eœ\*Yÿ%zB†B6º#ŸÃ'|E` 6 +녜ҺÆËó=4H×jÙ~=ß™µXÀ­ÿ=C¡¹Ý‘ËáÀ“½"°€ …k¼EKâÕ£ß ýrR“÷J¬EJ·“—±O7îäÍ‚îô͇ +´JXØxèóC¹ù[— S¬ë¹Ïæ^zÝ°.¼¯÷ëFÜ$ä 5`2. L£× 0 · æÁî# Z(Dô¿Sð÷žÅjý²¥¬³*'fÖåÃÝÖ;?buÞîR­rœ†þ—* Ýœ—"@„ÂúaËiý(ÿ« o^ÿ/o*o endstream endobj -1810 0 obj +2070 0 obj << -/Length 4553 +/Subtype /Image +/ColorSpace /DeviceGray +/Width 454 +/Height 425 +/BitsPerComponent 1 +/Interpolate true +/Filter /CCITTFaxDecode +/DecodeParms << +/K -1 +/Columns 454 +>> +/Length 164 +>> +stream +& ÙÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿøÿÀ@ +endstream +endobj +2074 0 obj +<< +/Length 7231 >> stream 0 g 0 G 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(9.1)-1206(hb)]TJ -ET +0 g 0 G +1 0 0 1 154.285 609.491 cm q -1 0 0 1 144.589 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 148.175 706.129 Td [(read)-206(\227)-207(Read)-206(a)-207(sparse)-206(matrix)-207(fr)1(om)-207(a)-206(\002le)-207(in)-206(the)-207(Harwell\226)]TJ -21.381 -13.948 Td [(Boeing)-250(format)]TJ/F54 9.9626 Tf -25.157 -24.941 Td [(c)-175(a)-175(l)-174(l)-865(h)-90(b)]TJ -ET +.52 0 0 .52 0 0 cm q -1 0 0 1 144.379 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 0 0 cm +/Im7 Do +Q Q -BT -/F54 9.9626 Tf 148.265 667.24 Td [(r)-90(e)-90(a)-90(d)-223(\050)-167(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-871(f)-97(i)-96(l)-96(e)-96(n)-96(a)-97(m)-96(e)-367(,)-791(b)-206(,)-919(m)-143(t)-144(i)-143(t)-143(l)-144(e)-277(\051)]TJ 0 g 0 G +1 0 0 1 -154.285 -609.491 cm +BT +/F62 9.9626 Tf 152.938 587.573 Td [(Figur)18(e)-250(6:)-310(ELLP)92(ACK)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ 0 g 0 G 0 g 0 G -/F51 9.9626 Tf -48.37 -27.896 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ 0 g 0 G +/F59 8.9664 Tf -16.48 -31.498 Td [(d)-11(o)]TJ/F62 8.9664 Tf 17.426 0 Td [(i)-243(=)-89(1)-178(,)-98(n)]TJ -5.537 -10.959 Td [(t)-168(=)-32(0)]TJ/F59 8.9664 Tf -1.13 -10.958 Td [(d)-11(o)]TJ/F62 8.9664 Tf 17.682 0 Td [(j)-272(=)-89(1)-177(,)-121(m)-32(a)-32(x)-32(n)-32(z)-32(r)]TJ -5.792 -10.959 Td [(t)-734(=)-734(t)-734(+)-1289(a)-92(s)-226(\050)-236(i)-381(,)-358(j)-342(\051)]TJ 85.313 -2.332 Td [(*)]TJ 5.293 2.332 Td [(x)-176(\050)-288(j)-156(a)-289(\050)-236(i)-381(,)-358(j)-361(\051)-178(\051)]TJ/F59 8.9664 Tf -102.419 -10.959 Td [(e)-19(n)-20(d)-630(d)-11(o)]TJ/F62 8.9664 Tf 0.022 -10.959 Td [(y)-156(\050)-288(i)-288(\051)-730(=)-734(t)]TJ/F59 8.9664 Tf -10.782 -10.959 Td [(e)-19(n)-20(d)-630(d)-12(o)]TJ 0 g 0 G - 0 -19.925 Td [(\002lename)]TJ 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.185 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ 0 g 0 G -/F54 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf 16.498 -17.519 Td [(Algorithm)-250(1:)]TJ/F62 9.9626 Tf 60.055 0 Td [(Matrix-V)111(ector)-250(pr)18(oduct)-250(in)-250(ELL)-250(format)]TJ 0 g 0 G 0 g 0 G - 0 -19.926 Td [(a)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 344.406 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 99.895 316.473 cm +0 0 343.711 126.526 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 347.544 442.084 Td [(type)]TJ +/F102 8.9664 Tf 112.299 432.339 Td [(type)]TJ +0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ +0 g 0 G + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -19.925 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.069 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(2)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.956 Td [(attribute;)-361(will)-324(be)-324(al)1(located)-324(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side,)-250(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ + [-525(psb_d_ell_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(mtitle)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 9.414 -10.959 Td [(!)]TJ 0 g 0 G -/F54 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(A)-337(charachter)-338(variable)-337(of)-338(l)1(ength)-338(72)-337(holding)-338(a)-337(copy)-338(of)-337(the)-337(matrix)-338(title)-337(as)]TJ 0 -11.956 Td [(speci\002ed)-250(by)-250(the)-250(Harwell-Boeing)-250(format)-250(and)-250(contained)-250(in)-250(the)-250(input)-250(\002le.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(iret)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(ITPACK/ELL)-525(format,)-525(extended.)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.477 -184.274 Td [(143)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F102 8.9664 Tf 0 -21.918 Td [(integer)]TJ +0 g 0 G + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(irn\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(ja\050:,:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(idiag\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(val\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -21.918 Td [(contains)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + 9.414 -10.959 Td [(....)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.958 Td [(end)-525(type)]TJ +0 g 0 G + [-525(psb_d_ell_sparse_mat)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +/F59 9.9626 Tf -12.404 -39.81 Td [(Hacked)-250(ELLP)74(ACK)]TJ/F62 9.9626 Tf 0 -19.057 Td [(The)]TJ/F60 9.9626 Tf 20.491 0 Td [(hacked)-383(ELLP)129(ACK)]TJ/F62 9.9626 Tf 76.975 0 Td [(\050)]TJ/F59 9.9626 Tf 3.318 0 Td [(HLL)]TJ/F62 9.9626 Tf 20.473 0 Td [(\051)-383(format)-382(alleviates)-383(the)-383(main)-383(pr)18(oblem)-382(of)-383(the)-383(ELL-)]TJ -121.257 -11.955 Td [(P)92(ACK)-303(format,)-316(that)-303(is,)-316(the)-302(amount)-303(of)-303(memory)-303(r)18(equir)18(ed)-303(by)-303(padding)-302(for)-303(sparse)]TJ 0 -11.955 Td [(matrices)-250(in)-250(which)-250(the)-250(maximum)-250(r)18(ow)-250(length)-250(is)-250(lar)18(ger)-250(than)-250(the)-250(average.)]TJ 14.944 -12.003 Td [(The)-190(number)-190(of)-190(elements)-190(allocated)-190(to)-190(padding)-190(is)]TJ/F93 10.3811 Tf 207.724 0 Td [([)-24(\050)]TJ/F60 9.9626 Tf 7.402 0 Td [(m)]TJ/F91 10.3811 Tf 9.384 0 Td [(\003)]TJ/F60 9.9626 Tf 6.823 0 Td [(m)-40(a)-42(x)-70(N)-76(R)]TJ/F93 10.3811 Tf 34.072 0 Td [(\051)]TJ/F91 10.3811 Tf 5.658 0 Td [(\000)]TJ/F93 10.3811 Tf 9.702 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(m)]TJ/F91 10.3811 Tf 9.384 0 Td [(\003)]TJ/F60 9.9626 Tf 6.972 0 Td [(a)-25(v)-47(g)-60(N)-76(R)]TJ/F93 10.3811 Tf 31.103 0 Td [(\051)-289(=)]TJ/F60 9.9626 Tf -347.317 -11.955 Td [(m)]TJ/F91 10.3811 Tf 9.436 0 Td [(\003)]TJ/F93 10.3811 Tf 6.876 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(m)-40(a)-42(x)-70(N)-76(R)]TJ/F91 10.3811 Tf 35.508 0 Td [(\000)]TJ/F60 9.9626 Tf 9.904 0 Td [(a)-25(v)-47(g)-60(N)-76(R)]TJ/F93 10.3811 Tf 31.103 0 Td [(\051)-23(])]TJ/F62 9.9626 Tf 9.227 0 Td [(for)-196(both)]TJ/F67 9.9626 Tf 36.586 0 Td [(AS)]TJ/F62 9.9626 Tf 12.41 0 Td [(and)]TJ/F67 9.9626 Tf 18.816 0 Td [(JA)]TJ/F62 9.9626 Tf 12.41 0 Td [(arrays,)-207(wher)18(e)]TJ/F60 9.9626 Tf 61.626 0 Td [(m)]TJ/F62 9.9626 Tf 9.825 0 Td [(is)-196(equal)-195(to)-196(the)-196(num-)]TJ -258.126 -11.955 Td [(ber)-197(of)-198(r)18(ows)-197(of)-197(the)-198(matrix,)]TJ/F60 9.9626 Tf 110.796 0 Td [(m)-40(a)-42(x)-70(N)-76(R)]TJ/F62 9.9626 Tf 35.914 0 Td [(is)-197(the)-198(maximum)-197(number)-197(of)-198(nonzer)18(o)-197(elements)]TJ -146.71 -11.956 Td [(in)-220(every)-220(r)18(ow)-221(and)]TJ/F60 9.9626 Tf 76.764 0 Td [(a)-25(v)-47(g)-60(N)-76(R)]TJ/F62 9.9626 Tf 33.173 0 Td [(is)-220(the)-220(average)-220(number)-221(of)-220(nonzer)18(os.)-300(Ther)18(efor)18(e)-220(a)-220(single)]TJ -109.937 -11.955 Td [(densely)-250(populated)-250(r)18(ow)-250(can)-250(seriously)-250(af)18(fect)-250(the)-250(total)-250(size)-250(of)-250(the)-250(allocation.)]TJ 14.944 -12.003 Td [(T)92(o)-385(limit)-384(this)-385(ef)18(fect,)-418(in)-384(the)-385(HLL)-384(format)-385(we)-384(br)18(eak)-385(the)-384(original)-385(matrix)-384(into)]TJ -14.944 -11.955 Td [(equally)-283(sized)-284(gr)18(oups)-283(of)-283(r)18(ows)-284(\050cal)1(led)]TJ/F60 9.9626 Tf 163.395 0 Td [(hacks)]TJ/F62 9.9626 Tf 21.758 0 Td [(\051,)-292(and)-283(then)-283(stor)18(e)-283(these)-284(gr)18(oups)-283(as)-283(in-)]TJ -185.153 -11.955 Td [(dependent)-304(matrices)-305(in)-304(ELLP)92(ACK)-304(format.)-473(The)-304(gr)18(oups)-304(can)-305(be)-304(arranged)-304(select-)]TJ 0 -11.955 Td [(ing)-253(r)18(ows)-252(in)-253(an)-253(arbitrarily)-253(manner;)-254(indeed,)-253(if)-253(the)-252(r)18(ows)-253(ar)18(e)-253(sorted)-252(by)-253(decr)18(easing)]TJ 0 -11.955 Td [(number)-256(of)-255(nonzer)18(os)-256(we)-255(obtain)-256(essentially)-255(the)-256(JAgged)-255(Diagonals)-256(format.)-327(If)-255(the)]TJ +0 g 0 G + 164.384 -29.888 Td [(165)]TJ 0 g 0 G ET endstream endobj -1817 0 obj +2048 0 obj << -/Length 4948 +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/ell.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2085 0 R +/BBox [0 0 447 205] +/Resources << +/ProcSet [ /PDF /ImageC /Text ] +/ExtGState << +/R7 2086 0 R +>>/XObject << +/R8 2087 0 R +>>/Font << /R9 2088 0 R/R11 2089 0 R>> +>> +/Length 2281 +/Filter /FlateDecode +>> +stream +xœÝYËŠ%¹ÝçWäÎU‹’õ~,ÛØoº§À˜Á«kOý6=^ø÷!Å )E—™M3Pôâv…â)ŒÌúrZãNËÿä÷ö8~ÿ©œŸÿs|9Zñ&”vúêM +î|-7¬;}¡ßâΖ‹)…Äd²_Dg|¨çí³©]]¶ç`l©§:HÍ4Çúd(f²)‰Ö›ñŽÅd)´Lš"é×bZ^š3ÅÍý-’ßÕ~«Æ¦%€ÖLŽuyf ˆ¤ ¨\Hx‡ÄPiGZb¤_NIˆÈIK–P@``kÃMã8¯Ø–Æ8N¼6tÂ%Ê3L1V“óì‹‹Ñø®-»£5Iý¹@ñøº´Å…h\ñڼɶ!%ˆ³/ª 5QRµÅ…ÔUBªk€*#HÛ‘?H…f[µÞùÙ—cÇ‘ƒˆ³-ª€"ÀÀÖ„ÙNª¸õ¾PÚÞYíC°&²i½»]ï‹3¹»D_(Ô®—QveíŒ3©¶ª¡j¥ÂJVggTenº@in‚Ô;¤Y¢iŠ­‚ø@fs¸ùa¹3Ñg]FgsTAê ¶VÜŽ¿žÿ¢òÚÓ“+nßãpÖNñ>Ä.Pˆ÷®;¥Ÿú~R·‘À@»c,S¼‘õ ócß—3‘õίޓÞKH…NùK®ÄR1 Ò­çÿ}|¤_Ži½,¼K@4y²ÉtҔÁ[Š³©a>¸3†˜-Ý0'‰ÑÄI¬$W²³Poˆ%Rè*NÕ¿Ú§†¼ø¯‘ÎÓŸÈK@£XÐ ÊŠð¦F(¼ª9(ój–ScA-\úpCc&à”¼¤2ˆkƒÙÔ@ç=ØWZDD ND bEN“z‘õÔU™zÙV£®ˆ`T}ÆY@3”íZa_YQÞÔ…W5e^MrjŒ"¨…K´-“wQU0ªFš·œ¥ˆ°šöEX>•5(!N bEZ“z‘øÔ¨…Q¹Å…”Vc¡1âÍ,2ófº‹ÉŠˆ¼‰Á«Èb2/ò\4z¦…K/ñ¸2oˆmeÞ.N®íÊ;õzç‹©îW0´¯4Lè¯eð®úÿI¼û GÌ,Nûÿ{PeZ®D‡>ÝùR2³áƒ…ûÿïÇ÷$]×.‹¹¬‹,-‹­­‹,-‹.¸uµ‹ër¾„ÔÅu¹]¢êâ²ìRÎðŒNUšJ¬bÏ’Ö%‘Æ’³e]ƒ(‹Ñ^E”Åâ/‹"ö "Y¡–EÛ(:ß{œ/ȪUâ®dÕr63¸h¹h…¶k rÑ¢[»i ÒµøÄøäø8ÑÜ i ®Ù®Z‚¬Z9úb ÈE+ò#÷¢%ÈE«Ô-. «V¤Ú\m¹hE¿ÅdÕ¢7Rëyl´Æ6ky`d(µTÎ_>Ó”äÆe”ŸÛãüÃ+]@zšÇóõÇc¼ÎòÀC׸ËÆl’¥¥Çñäž_&û‰ž_6Ó4å™äõ²xnôë_Ž'ÿü»ãeÑõüÔº¡ë:9¶Fæ=Q/xؾ±_ýÜã ¼•†Ý¾Pyá…êN†ŸhÇž(~Jò7õ+n$Ö­ÑLÜxxÑ„8CÛÍyS¨ÚׄÆÒ ]¨¾f;XûÓkL'/ƒ<íùË?O¦G7øîíž}"´Ö„ÄÃ5ž^ B‡Üò|IgÊñ8D7†*ãm‘7Ó!ßGó&?-8ÇkšX˜òðA;€doië©r¦¹®L"ßBR,ìyÜèÁÔôùÈ.~‹²ô-ü &gjg*±ûªŽêR㙼'ƒS¾|÷È$j­¡ìöI]žVè*ÉîFÇ„Ÿ#0ùv Xöoñߎß_JÜ«F>,¿kV@V¥w‚è½FÍüjý’U¡Q‹/Bñ ö‹8Ý°°Q‹ý™¢!‹=îÕ»L‹ûå|L†»O/ÉsbŠøj p4°žbÍ&z½p‰î®ŠLÖf.(Åí$}6Os¸XÈ…_­\@¤ ƒ*HŒj`Ë‚[ö^3[oÙÌM §¡Ëq™¹á<‘ã†ýÕ­ˆ Û·ØŸ‰! öÖ[ö®Òâ~ UμHX`ZD .žy Y*Ç~ˆêTD%»4ÌÏ´‰û·ø—§×ûI‰û„ùcf¥ˆ„… QcÆYHC¦ É*cJRÒ€G ;É¿õ©˜Ï@3~ýZL²kÄÅŽÿ8EÜóú8~xúðìÓ÷ç‡OŸž_([zR6 +Íy-?ýíùï¯ßñGz·5®„ùËéOß}àMžså¾Atû·ÔÇÿ|THF +endstream +endobj +2087 0 obj +<< +/Subtype /Image +/ColorSpace /DeviceGray +/Width 510 +/Height 227 +/BitsPerComponent 1 +/Interpolate true +/Filter /CCITTFaxDecode +/DecodeParms << +/K -1 +/Columns 510 +>> +/Length 48 +>> +stream +ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ;Y¯ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿà +endstream +endobj +2091 0 obj +<< +/Filter /FlateDecode +/Length 171 +>> +stream +xœ]Mƒ F÷œ‚àOÛĸ±mš¶Àa0,‚¸èí; vÑ„ß›|ˆ~¸ÖD.ÁÁ #×ƪ€‹[ q2–•WâNy‡Yz&ú›ôïGNÔßåŒâYžóM¹9à.^i'dmQt­ÖC«þžªMõž¬)YŸ:2èDØ^26Y?‚iRªt4à°†€6æÞ¹Wêc,þ¾æO§Å¾=+V +endstream +endobj +2093 0 obj +<< +/Filter /FlateDecode +/Length 191 +>> +stream +xœ]= …{NÁ Ñ™ahbc¡ã¨ °d( !…·w!‰…ÅÛáƒ}ûC3\/Wï2m)èdj7 –°& t„ÉyÂ[jœÎ;Õ¨gI3ÜT|"PL»ñ]ÍÐ> +stream +xœÍzy|TU²pÕ9÷ö’îNwÖî$$Ý›ÎÖ Y€Ä–ܬ, $,B‚b:@@0$(&¸¡ Oq‚‹C§#ØŸDŸ:ƒ:Ž3nÏ‘8â¸Ìd†qÐGHuo”ï7¿÷{ïû}¼{RUçTÕÙêÔ©sN ˜ 8ÔÕÎÏÉõË<.xvC +œÇIð + Ã,Ø¥P;a:œ†#ðM@‚ +8.´ƒ*°¢Ãp¬ƒÏ`Ò¡>ÆHj§Ú!Š‚_®†{ƒÇI+ Êág0„«q>äP~ËB7õÜ+¤|ŸJOÁg˜€”ûD@tÿA$¬‚7‚ B3ìÇMø%8Á Û„¡7x\Gá·XM¹Ù°A|_VS­gЊÃÁ³ÁÏá%¡…Zºî¥ûa˜Mäåb?8 ®…9ÐDÒÛàŒÂI\¦Ë‚w?|ÍÜìu®¥q¸a&4Âý°‡¬ñ.œƒoЀ…ø¤ô6þY|ŸÆV ]°‘üê)²Þ~8ÇqNbVf%kY!®#Yì¥þá VcãË|¯˜;VŒÆ?!êi„»áeêãæ’õÀ“y§$tŠy—¶Ð —Á“pÞ¦q|Lvÿþ™”>a·³îà¢ààg4Øa*Ì…ÅÐëáxšVõxþŠß3=iž^7Šçƒ’mS¡ŒÆ^KÚó©ím´J~Pz—fšÅTœƒópöá#Àð¦aN¶–}Å}üMþ‘0YƒÅÔR,$Q¿,‚VZÛÉÚÒ|Àkp +c0³iFïRýoÙ5¬‚Ò3ì4û˜ßÍû„‹â=c#cû>Ø Zò²éd‡.xŽ¬ðŒ¥1dà*ìÀOiä;Øó<œ[¸Ä y)_Àø½|'ÿÿ•°N8(|(Λăڦ±›ÇÞVï"[ hh\i0…üg9yÓM4¾vJë`l^x€üåA臃4ï“p +~ ¿ƒ?Ñ +:iÌ+©÷5äuwã”ÇCø2¾†§ðüVI,™R:›ÌJX9«b+ØÝ”v²3ì]öŸÀ—ònÞCi?Æ?@„ ˜Gi†¸MܯyS›®¡mÖ½uqôR楆KÁXüØõcŒ½<öypapßÙ0‘Fº•Fù8ùà^JÏ‘'ƒ×á-xOë×ÈP$·¡DÞE«V‚Óq&¥Ù8—Òu”ábJMØŒ­”º±ïÀ;ñ.¼VÓc4·½ø,£ôQú-žÅ?àWø5#'fœ¼ÙÅÒX+¢™–³é¬–Í£´‚µQjgëØzZ¡ýlgïò(îâÙ¼‰¯åóŸñWøoøw²„Á#,Vw +§…·…÷…ïE»X)¶Š»ÄW4 šÍušUšÇ4G4_h.j5Ú:m³v“ö7Ú ÎEÑêç4ï£ðã/Gs;ÄháVv–ö…·‹[ñ:²˜†-à«ùü×âr<Ïø!öò•ü¦à3¼Šýƒ·áBv“¹],æËa;ñ û„]`Ÿ 1¸€}‰éÂ¿á ¬—3Ò‰øŽ#Ü)~ÀÞƒb¶‡ÙküN~gðß¡XÜ…gÅ]ìmp#, +ÎÒ®ÞÊ¥J¿b+Ù6¨ +Äïa%ÙýYñV²÷4v/fòß»à3.±¿áy|„¢Æ/q–ÂndEx"î%L‚Q\ íø0Èx‡@<À÷c 3Òjù˜ §Ð1öKîÄßð0hPƈ©,ëØyvQs†"R”ø5lDŽ¹ä;—¿1¸™vÀN–F1­’¢É;˜6x”âý…±•ˆ-¾/n#?Ûó`äÂö&ÓÞøŒR=Üy0D>x/ä²Ç`S°—QÜŸMñ“AWA(ZZilÝt^IJdŠ…Ôë?(þ¿AQ¿ÿ · ƒvÖ0¤ Šd»PI‘ÉKñw¥e°„JOƒš£â;P‹VÁ1¶‹¼ü#¸‘ÎœO©ÿxðÐøÃ!‹Fí È¼–j<96dJ÷À›È`3yíó:aEÞG‚«h†+錪¡3ñ¬ > +å´vó‚w·AcpOðXóƒ(þ®úa2lØBÑ-PŒ=…¯ÒyôŸ¸âö øâ‘ mð¥ŸÑˆ¦‰' WxbgIp{ð·CöH& 5Ó)zÖÀŸÉn3ø0äÍaÁ*ÞN'ÔY˜Ü´c´WSä}öjEŠ==$î•e¹dÚµžkŠ‹¦N™\XŸ7)7gbv–;3#=-Õ•"%;ö¤Ä ñq6kltTd„Ån2Âô:­F8CȪ”ª¼_ª×'¤J3fd+e©‰M?bx}bU]­ãsxU5ÇÕš2i.ÿ¿4å¦|E-x²³•’Ã÷Ë +ÉÀÅsë)…ÔàðªùÙj~‡š7QÞé¤ +ŽJ[k…Ç^G¥¯j}ko¥·‚š0„•Kå-aÙY0f ¬r>«Ô>€Öi¨f˜µ²x€ÎDƒòÅK•¾8©B»*›–ùêæÖWV$8 ÙY>,_*5û@*ó™Ýª +”«Ýø4å>­Úc¥2ØæÈîÝ°@³×m\&-kº¡ÞÇ›”>"ÜÔo…Ϻñœí‡"5Y^¿õÇÒÞ[i[éPŠ½½[¾þ¹õ?–:ÜÐ@mø˜«ÊÛ[Eo'VÏwP_ìî†zÞM:”y(s +Í®EªT8ÞUŸ^*“Z{Wyiaâ{}0oƒÓ/Ž@|¥£wA½äô•$H M¢¡wÞ†Á8Ùwµ$;kÀ2ë@¸y·Û—™©8ˆ¶œV”Æ8M-fg­0ŸÔnq!óAÙ¶©¡8‡Œït*Ë»- C3|=sëCe4'øAÎq7ø˜W‘ _–Ä\§Hz.K®T÷JäÇσò²ˆñéR¯ü™-±Q•­Å>Œý/Ä-!yõ|©zîâzGe¯wÜ¶Õ ®*…äS¯ÈÆsÁ}‚‹,5S"×›·¸^aП誒*WzgÐV£1ú¢ÊëykåXW›"ÿ½áJËJ¡Þ¨´%¸4ªÿ/ huäÀ*U>‹wF7„9ÿÍJày¥–J~¨6>'_±ûêò5W•¯ž±—Ó€…TV½`qooØU²* +V½½U’£ª×ÛÛö4K‹Ô{œ×óúÞöJïå凶%øª¶7Ð$Z±8[¹#‚Ö9V ‹,ð}çØC0Bpž@€6¾º úŽ`&l'È!¨U8ü ?HãÜKõÍ„sÚúXÀŸ#þM +æø*:í|;=Lcˆnã©ô§Dã‰>M|z>ð=TVèîñòDùOÆùS9–ècãôQâ'}D}ðÚùÃãåõ¼K­×9Nûy‡?Én)M"¹ƒ —€Sn'åv’évR #]ìV«= Í#º&DÉ\›ýNI]£ÍƒÖ¸¼~2éf2ýf²Üf²ÜfH´é²Î¦N6ßD:›Hgél"«äòꯃ [œìÞAvWø>ÂÃgTþ]„wô+%~ Ù1ƒFu_åO·““­,’óJNÐ=©Ùåƒq‰y}?”ôaŠ# §fE·E•¶ ê +·e0>1DIë¦Òp¾n#`M8… € ‚@àKý)9ö!>Öè@·w³nÞ-t‹BnFžäyP§rÉHž RÈ°7zpŠWß®ïÑs‹Þ¡ÏÕËú:½ØFO¢>Îí<‡—ðZÞÈÅ@pد-Î'"O×çï0ô|†aÃèÓ kÎhF4ç5¢C“«‘5u¯¦]Ó£Ù¡é×èwhvh™×Ðnè1p‹ÁaÈ5Ȇ:ƒh×béݼYÙ´„-í;²q#ñüF‚FZF2ÅÄÂ@% ÁÊ©d&=3陉k&®™¸@X‘Ôx ÚÇ¥š+’ËuýóŠ„ ¤áÄ 'ÛŽ>¯äfQÉD%•L¤u†]¤Z;ê¸Ê! ¯!|Y–;.÷hTùyUç²LV겋rSÚpú2°?wd ì))Í““ EFF6J®ÆôƽB›ÔæjKoÛ+ÔJµ®ÚôÚ½B‰Tâ*I/Ù+äH9®œôœ½‚]²»ìéö½B_Í‘š“5§k„Æš¶šî>…–nÐïÎÍSi²K¡GýqñyS̥ײ#4F» Îp0¶ä”´ˆìˆÊ=LÜÃÄ= µ"Õ:¬„Âöq™Âß­Ê”œ"gWÉ9Mþ¿8¿¶´†Ân#ÁnNm"ù!U;”;¢ò}„GT~í¸~¿ÊW´ì—ë)Ap±îÓ6\ %í"œæ‹à,µNØNÐNp„@à‹)-â‹ØaJ‡Ø!ž%›&ÅØ!6–N–È¥ÔÂŒä &< âÇT|ŸŠKTœ"‡Ï2};ËôÒ,Ó=³Li”aét™p§Š²¡Ôô|©©¶Ô”Qj¢Ö¬à‹Q±FÁøGÏQq–í4}ç4ýÍiú«Óô”Ó´ÖiºÖ©Ô›@{ØÄ¢UlP0=RÐ@%»7€° žTaÝ <îŽbÎÝ÷'(tÓÝ÷74`µox)T7;|ßΧy„Ñ%U”Êl»¾ÄV9-¢¨ªâ_ ï8vÿðÙÜ?þl‰¾Gªç×ûžKlðå)™`bCµoºò,<ÎÖ²¶ÊŠã¬]! õÇq#[[9OáãÆŠ†+jÌÚI < +QÔ!YQƒdTÕjT5rÓäÊŠääÒ+8SQ"÷yEUZj+…º ¶êBj, RÔ¶RX’¢FþjÌüãÆŒ€fµ1³ÔÆ&(J.©d¹•).RpMQÅK®ÐpÀ¥öãµÄtÒC:äã:LG:îÿŸ_KÙÿ@›>Z¶Tyœ{¥Ê¯oÛúV›¯§ÙáXöÑø«=ÕÛ¼´U¡M-¾¤– +ß2©Â1дô_ˆ—*â&©b–V.¨X*·Tø›ä¦J©©¢ap_wyõU}Ýw¥¯òîÑX·ÒX¹Ò×¾ê!®VÄû”¾ª•¾ª•¾öÉûÔ¾ªç•au]ý€Êèu§ÒAf£ýàMp6”ÅZÚ§©›ã§íö„!èØ2ÐËØ(•ùLŠ(»4»TÑîTDáÊÏ/ã"Ûí×8†ðÀ¸ÈBì© Ü`«\Yq寣££S®.7áÎ.›Êë¤Mëœ_í«R‹Ÿ§Ò'{+PYŽ®ñ¯¼^¶œôœö°6O·§Ï³ÛsÄ#vu5;òdòéd֘ܖÜÜ—¼;ùH²FÜPLöìNþK2ï"oÂNú*+Ô>»ˆÒŸRììêP> :Bݹ»Üåõ¥É°”n½H7ôlˆ"ò æˆð„ß!ø”àoÜIø!‚gÏæÙôt¯Pzlp+AÇÆós ó¦ˆ6-Ñù‹C´rNˆzJólDý%ùa¥fº€# ~ƒàC‚¯þI ò<ž§6ÞòÚ†èp# ¨Ð© w'º)ƒŠ¹;;ÜnP@qpZRuãÕ~ØÑd +Z"¤¤r;”j] +½¢Ç”‡³¨Þ™A ³ž`/ÑeUËNúAì¥ç9„i•ÌQ„8F|Qá{pÃjð?O]jÄVz&>*GË6¯­ß6bÀ&ÛØz¸Xxi®¤›ŠûéiÊÕ¼ŽòUþ½®WB,q¿–é 63=CQ¯32CøwRŸ)G†‡›åˆÂ\s·y‡¹ß,˜ã¬C,Ïшi°KÜžÙ–ÑseÄ%žˆÈ"Œ(‚oF/â7n÷¤\X‚k—D¹ò#¢cc­1ÎÂi¬0¢ -5UJÖžÇYÎ(Ï cÌ;56LëŠw• ?ßóýÖuS“˜ËÅ'mdíÌt$Ù•9fÑÒ“°J¾Ck3Ym®-°É„âdNŠÍÐz´3µÏj5²ãza±îzëbÛMºÎˆÎÈ' O…?qÈp(ü”xÊú ÛÖl#Žï„ï¬1tâÄ„˜¸Ø8k¢M«·l†Ä‚¸éq÷YûZ[cÖø8cœÆÄ㘨±Ycc¢µQ‚)€­²^/GKzô¨ð|ÙhãûâpwÜ‘87ÄóÉp÷"3&ð~Ùšß×F5FµEuG QÔÊQ2M*²£ÇÁ½Ž~sÄÀïÈwL(ËѬu³>v’fgÙ_˜ŽÅÙ‡ðºH…L>ûœgtŽeÉÚo—̾°dÔ2J¦½´d­§äÒZ÷¹ˆHk‘º ‘EEÌîÙj7¿þê¤\\»n ­‡â­näÎB€ÂZVš<9?O™šF˴μɓ§ðƒG° »n^¶;Õwú‰½¿Ëµï»iؼzQU<Šcß»° {v˾®µÇ_ÿÍŽ+ž>:v~ªeR¶ê“óƒ_ð…´^yXs‚#~c‘^yõzŒE¥úÊ°*Cu²pZS3äoÁé‚‘‚¿‡i¡KõÝÒƉϥOšxjâYé¬ë?'~•ü¥Ë8S—Àíƒéé°sƒgr17À ŽrÑ‹±Ü}4Qvç$ÒõsÐbÊH?­ zö)=”ÉÆl‡jcZ©AŸÜAüìžl¶#»?›eÿh£¶›f`ŸÉarö °à´䨓Q,*.“ð‹+  ZtÉÚ +:G{JFÝ£ëJF—ŒFåŒNÊ-ß Ož˜“”f4ÉNÉ™ât9è +OM s4cŽÝŒIfÊ9 iͦŸ¨ÉmF»)±™¢‘Å3‡2·Ð§î¡u°ÖíŽRJuÂXu¹œÉ©…*+Ö›Ÿ7¹¶–²¹$eŸ)k«m-¸ë™EeC›{Úûã}Ksœqñ·Z]™Ë•âíîGæ8jwÏØâ}¢U˜ußëjïÜ5éØm¾-*Ò³tb‰Æ°kumõÔÄôÒ¤°ïª]ѽOù•Ã&Zß^Zß8H…|Ü(5 êóíù™imù“{ =Æžøž„;\=©½ùÏÚöÆïw Ÿ!õDÚka¯Þ3Åj! 5&¯O‹5Yã]&Wx5nÇ;Mw‡? á×@1Òýg¦7âõi7䯂U¸’­H]•ÖšnJ[Ÿµ)¿Oè{´=º;"îˆì‹î‹}LxD·3â‘È'b÷¥N;œŽé¾4|eü2üË´/ó2´&}Z1áÔ<±BÆø4AE«ƒÞ~˜­(Sb©ž®yz”UÈ¥¼—Ë(” ™\è-ì/) +¥IÀqd¢Y˵ÊÖVn+Â?ã——=dô¨â#£ç.Ðö-U¶%Z‹TÇÈsç$%GÄ +º—S”šÁ®MlƬèÌf˜I^,[$é¹c³›!'‚¨·0Õ)Ÿ ú[‡©! wÐhc×ÐHÉi +Ï5yrh{ÇDÇZ£4 +÷¼oÏ’·žýé/VôÕ|8ðòê…pÒ­òúåË{ +'Mž_wÿšÕw¤Ngïê_x×Iÿºš]7Ý;gùÚ¾774u,xwõæÚ•·¬¯-hÍû¼j¯wËÍ(Z¥üCŽqœ|"Œ^ÆïÉé±&2R¥I6sÙŒ™FŒÑ"Ó ×‹ŒF“ 1š(–N#µºh­V§ã‚VcÔÝ„¦ø$žÜ-›DÔèuNŒFáÝé9_Ëeƒ^o減áŒðï² KÔ jF/R#fnÖÈZÔÆ…ÿ(r®õ¨ûÖCa“²°(çlIQŽÅCiÔri'¢(Bœ['º…Í–W•¬Ùl¦slÝ\²vÆHR„³ó‰ ?~lï¥WX×Í{ÇRðÂc?Áå=üŽ‹ÛÙžK¡_ÓçÏñd+¤¡Qο-íñ½äÒ„Vaƒ¸Y·Q‹ñVÓ†¨[ÛtwF…éu}ì˜fs¦ÙDžä@+ÑãÛ†òóiu4Šn²>ÇÕ梒— ‘ÂâóV+˜lCä‘ñh~"-‘ŽHÀ92䌞 .gx3ú3F2„ "Ïv’šv2Œ…Å¥_×FCíÒ9:R,äµË–Œ®³\ ÷-R=X9UTÎLHÑES-® ©RªÝäl†Ds|3¦è(ç0$5cB¡d½+ä»—ïU!×]b-œ<9r +ù*¹ê”ñ Æ´ § y­ÑVß1òvÆSÝ}o-¿íõý·<øñë{^bù‘ef7ÜÓPÚ8ñö .Ö…)GZ~÷‚Û³½¿ÿý؆-«Øñ;æ4}rkÿ®wnY˜¥®Ã,ŠW/Ð:X †üM:u׋bŒBL¦øíãH}<¤Ê©LNõ¦ö§Ž¤ +© +;¼Ú ú Ÿîvq.ÅXWNáÐ<;´¿U£Ô`Š”’œÂ4 9¹»Ö5!!1!)k¢RÍ.Cª-ÎÇ4N!‚¶¼†ŒN¹X£b6¤Ýž lùHKL3Ä…Å4ÿ°ã3UÈÌÜU2VD4S÷ù‹j¥)“#ÔÈO_Ãfmï\ì}rÓ÷¾ÓüÊ–5¯V­Ü™417¥(£¸¢pFÛõÖÎ+ÝýÚØ‘?{ø³—ÿ>öÅÀÃMëaÑOtä:¯?ö¤z†×ïîkÀ‡ƒŒJ˜&ª¿HP&N‡"ÏÔ—lòšúMoà)ö>¾ÏFL†x@‚I6q& +B’ã9‹æœ Ü$ÊÓ Åߣ†ˆæ÷Êm6€ë7 !Î(±/€³Ïe#Aê„~A^dã¸É-tÞžSí~A¹ï¸-£îÏVq¢{køæË·›ud¯(g Jè$ShÓ~ÅÞó´ãÃcÛÖæ.ÈOkRÿù’ðZÂD¯!ôï]øÞÃÖó!Lø•Ø*~xåßÀ€®\ý'ñøî8½tÂÿCÝÿ•Ÿ*ûœ/ + ç*XùIŠ°Vý9Š+O Pþož¡™#D*ÿ­ŠrˆX°rMKÇœ–[æµ­iº¹nþìêX¸c¼ÞçÓ]]<çƒW1ÆÿUSS„EœóaOsaÔ„Öõ¿ø”úìí? øŽ 5š=ßèâB>ýiâ+ +=zí |ßyi»¥XÊ¿–Ž÷÷ +“žÑ +endstream +endobj +2095 0 obj +<< +/Filter /FlateDecode +/Length1 3512 +/Length 2264 +>> +stream +xœÍW{l[Wÿν~Û±ïÃ×vìÚ¹×7±Û\;îüHâÒG–¦éÒ”.khewi×4MÓŽf ]ºiÝØXe ¬PU“@BC“àƹN¶!Ĩ!e â! ¡b­T›–„ï\»i7!þã^ùœóûÎwÎ÷ø}÷žk Ðg‡±ûÇsy°.ßulöOÍNÎ5±W ¯MÍŸV½²ïü/››™ ,澉s'¸é™“OkêÛÏàïåãÓ“GyÏW.¡³(ì=ŽçŸà¯"î<>{úLS¿m›u'šlÙ°f'ÏÌ‘o WPÿ_ˆÕG&g§[þ½MlîÑÇN·Öfós§¦çj“Ñ=¶útÍËÜ%ÈHà»6ód{`Þˆfàx ʆ$ÄàØ 2A…mp/l€O 8GAÚá0<9‚N8ypÀN8_€sPƒ ð9˜ƒÇá‡OcfŸ‚çàyð‡B†‚xêÝŒÅÎ6mÍçÌ^gƒ†s&8õ0xŒ«nƒ–rfÁÖ ´?GóK‹½"P»WÀGÛXŒZ¨M2íhµ£‚™%†™7<é¬ËXt 0 Ò‚D9lP^0ûpí=ÖZ³ŒROV”h¸LÝâø;»;q˜–ꮀX.—iP¤Î2˜áNQ2yw¹¼ñ’)A?q:œzq«½¯·TLéI‡D©CO{å|s”*{‰¨‰vMÔˆ¶ûd2Ù•ˆ¼Ë°…¸®mÙ®M©°´ñàîÚ %¤kù:ùËÊRç^Ý7Ü9šËÕ¤”öæûvæ‡S÷FQ +?5¾ü-ŽÇC•ûü?ãKÝË +VsϤËÜ7 +qØusKIaQrBS| ŸÏ×CœBõ2ñ»¼;U3Œ´¸´àÄÁÌ´‚bÝ_ψtKÔÇ,öðHL!*äû +a½‡4É 'H“5„N=]HB¾—<9xèààÌp÷OÔ¡„Ú#S›{6 Ý?U™ßŸŸñyJ„»Žlïz17RìÛ_ÞýP2ù›Ê'ŠÖß—)Ílýø‰¤ì¿úàþm-~.àé¬B7|êa–a1bñc¶)È~•`éºÖ`±ÃšY´0 [‚Ù2r4¶dJþÕòu)Æ2&…ÜF=&±a,Œ@GžJr†i7Ì öV®€•Kí" `j„¦¦½L5‘ºY¥S»UÔ N ::æ!¬è¼“¥DÑX-ˆÅTº$³¢°êœèÆžñ<å¹+JWü•ýꦕwûãñþï=ñ©ê¡„Â]òë»J/hd¹=ÈɤÈý¸WµÉ²MßJÌR‡C^ù!·y|x{@Áºð`N0'ìË¢îeoŸ4u7K•BaQi0‰èXÀ„våh`ÉtøuG€íð`ü¼nÃLaÄŽ€UÌ`*¾VUÅOd-¤-$ÝÃ¥Kš‚Äwë N—¸…t¿q±sûÊ•!í³ùb2J";Å‹_ò'ÒÜ%™Ï¾s¹ÜÁÉ2¯ÝûÚ[#Û‚D–íírûŸÞ û%?V9;L#©uÉi1êGE§Å¨“E´gŒ2#òÚq aD}ÈW!ßäFÃèñÖ+çcJÄå÷¥Ô-¤cKGå£?~2ªFÉ/û:}+çͬWƒ‰æÙHa…œ%g9wŠû9b®uz±YPQü9à®7…µ3ùÖf6Îêìwßûÿâ²ÁE«µ±HnVWW±UY‹ØÿEîz}ô«åvªnûÀÝì­UJ9KÛ 0|jzz|úÔ‰cÖ~ä¥ÿ!¿®Ûpsõ?ùÇU Éý +œˆÿ0ÞGüU<äY|žï–æÓ×Ê7Lú7 +lþ'÷uþoL|E÷¾r«Ço¸ß9®ñߺӣú¾Á( +endstream +endobj +2102 0 obj +<< +/Length 7891 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(9.2)-1000(hb)]TJ +/F62 9.9626 Tf 150.705 706.129 Td [(r)18(ows)-336(ar)18(e)-337(not)-336(in)-336(the)-337(original)-336(or)18(der)74(,)-358(then)-336(an)-337(additional)-336(vector)]TJ/F60 9.9626 Tf 272.611 0 Td [(rIdx)]TJ/F62 9.9626 Tf 20.507 0 Td [(is)-336(r)18(equir)18(ed,)]TJ -293.118 -11.955 Td [(storing)-250(the)-250(actual)-250(r)18(ow)-250(index)-250(for)-250(each)-250(r)18(ow)-250(in)-250(the)-250(data)-250(str)8(uctur)18(e.)]TJ 14.944 -12.021 Td [(The)-391(multiple)-390(ELLP)92(ACK-like)-391(buf)18(fers)-390(ar)18(e)-391(stacked)-390(together)-391(inside)-391(a)-390(single,)]TJ -14.944 -11.955 Td [(one)-395(dimensional)-394(array;)-467(an)-395(additional)-395(vector)]TJ/F60 9.9626 Tf 204.327 0 Td [(hackOffsets)]TJ/F62 9.9626 Tf 50.049 0 Td [(is)-395(pr)18(ovided)-394(to)-395(keep)]TJ -254.376 -11.956 Td [(track)-287(of)-288(the)-288(indiv)1(idual)-288(submatrices.)-423(All)-287(hacks)-288(have)-287(the)-288(same)-287(number)-288(of)-287(r)18(ows)]TJ/F60 9.9626 Tf 0 -11.955 Td [(hackSize)]TJ/F62 9.9626 Tf 34.49 0 Td [(;)-237(hence,)-235(the)]TJ/F60 9.9626 Tf 51.365 0 Td [(hackOffsets)]TJ/F62 9.9626 Tf 48.416 0 Td [(vector)-231(is)-231(an)-231(array)-230(of)]TJ/F93 10.3811 Tf 89.104 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(m)]TJ/F62 9.9626 Tf 8 0 Td [(/)]TJ/F60 9.9626 Tf 6.336 0 Td [(h)-40(a)-25(c)-25(k)-30(S)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 36.682 0 Td [(\051)-192(+)]TJ/F62 9.9626 Tf 15.99 0 Td [(1)-231(elements,)]TJ -294.657 -11.955 Td [(each)-338(one)-338(pointing)-338(to)-338(the)-338(\002rst)-338(index)-338(of)-337(a)-338(submatrix)-338(inside)-338(the)-338(stacked)]TJ/F60 9.9626 Tf 314.252 0 Td [(cM)]TJ/F62 9.9626 Tf 13.459 0 Td [(/)]TJ/F60 9.9626 Tf 6.037 0 Td [(rP)]TJ/F62 9.9626 Tf -333.748 -11.955 Td [(buf)18(fers,)-449(plus)-409(an)-409(additional)-409(element)-408(pointing)-409(past)-409(the)-409(end)-409(of)-409(the)-409(last)-409(block,)]TJ 0 -11.955 Td [(wher)18(e)-261(the)-261(next)-261(one)-261(would)-261(begin.)-344(W)92(e)-261(thus)-261(have)-261(the)-261(pr)18(operty)-261(that)-261(the)-261(elements)]TJ 0 -11.955 Td [(of)-353(the)]TJ/F60 9.9626 Tf 29.729 0 Td [(k)]TJ/F62 9.9626 Tf 4.598 0 Td [(-th)]TJ/F60 9.9626 Tf 15.878 0 Td [(hack)]TJ/F62 9.9626 Tf 21.448 0 Td [(ar)18(e)-353(stor)18(ed)-353(between)]TJ/F67 9.9626 Tf 88.761 0 Td [(hackOffsets[k])]TJ/F62 9.9626 Tf 76.739 0 Td [(and)]TJ/F67 9.9626 Tf 20.382 0 Td [(hackOffsets[k+1])]TJ/F62 9.9626 Tf 83.685 0 Td [(,)]TJ -341.22 -11.956 Td [(similarly)-250(to)-250(what)-250(happens)-250(in)-250(the)-250(CSR)-250(format.)]TJ +0 g 0 G ET +1 0 0 1 197.579 464.41 cm q -1 0 0 1 192.93 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +.50096 0 0 .50096 0 0 cm +q +1 0 0 1 0 0 cm +/Im8 Do Q +Q +0 g 0 G +1 0 0 1 -197.579 -464.41 cm BT -/F51 11.9552 Tf 196.517 706.129 Td [(write)-298(\227)-297(W)74(rite)-298(a)-298(sparse)-298(matrix)-297(to)-298(a)-298(\002le)-298(in)-297(the)-298(Harwell\226)]TJ -18.913 -13.948 Td [(Boeing)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-884(h)-109(b)]TJ +/F62 9.9626 Tf 185.456 442.492 Td [(Figur)18(e)-250(7:)-310(Hacked)-250(ELLP)92(ACK)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ +0 g 0 G +0 g 0 G +0 g 0 G + -19.807 -24.042 Td [(W)55(ith)-207(this)-206(data)-207(str)8(uctur)18(e)-206(a)-207(very)-206(long)-207(r)18(ow)-207(only)-206(af)18(fects)-207(one)-206(hack,)-216(and)-206(ther)18(efor)18(e)]TJ -14.944 -11.955 Td [(the)-250(additional)-250(memory)-250(is)-250(limited)-250(to)-250(the)-250(hack)-250(in)-250(which)-250(the)-250(r)18(ow)-250(appears.)]TJ 14.944 -12.021 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F67 9.9626 Tf 110.952 0 Td [(psb_T_hll_sparse_mat)]TJ/F62 9.9626 Tf 104.607 0 Td [(:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 195.753 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 150.705 244.903 cm +0 0 343.711 137.484 re f Q +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F54 9.9626 Tf 199.827 667.24 Td [(w)-109(r)-109(i)-109(t)-109(e)-242(\050)-167(a)-241(,)-927(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-129(t)-435(,)-871(f)-96(i)-96(l)-96(e)-97(n)-96(a)-96(m)-96(e)-368(,)-816(k)-42(e)-42(y)-259(,)-855(r)-79(h)-80(s)-335(,)-918(m)-144(t)-143(i)-144(t)-143(l)-143(e)-277(\051)]TJ +/F102 8.9664 Tf 163.108 371.728 Td [(type)]TJ 0 g 0 G + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ 0 g 0 G -/F51 9.9626 Tf -49.122 -27.896 Td [(T)90(ype:)]TJ + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(psb_d_hll_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(a)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 9.415 -10.959 Td [(!)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(required)]TJ/F54 9.9626 Tf 39.293 0 Td [(.)]TJ -63.292 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.137 0 Td [(psb)]TJ -ET -q -1 0 0 1 360.068 575.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 363.206 575.584 Td [(Tspmat)]TJ -ET -q -1 0 0 1 395.216 575.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 398.354 575.584 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(HLL)-525(format.)-525(\050Hacked)-525(ELL\051)]TJ 0 g 0 G -/F51 9.9626 Tf -268.57 -19.926 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side.)]TJ 13.838 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(An)-235(array)-234(of)-235(type)-234(r)18(eal)-235(or)-234(complex,)-238(rank)-234(1)-235(and)-234(having)-235(the)-234(ALLOCA)74(T)74(ABLE)]TJ 0 -11.955 Td [(attribute;)-361(will)-324(be)-324(allocated)-323(and)-324(\002lled)-324(in)-324(if)-324(the)-324(input)-324(\002le)-323(contains)-324(a)-324(right)]TJ 0 -11.955 Td [(hand)-250(side.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(\002lename)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.368 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.765 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-234(\050i)1(.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.955 Td [(is)-250(used.)-310(Default:)]TJ/F59 9.9626 Tf 74.799 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F102 8.9664 Tf 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -104.935 -19.926 Td [(iunit)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.187 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.758 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F51 9.9626 Tf -317.894 -19.925 Td [(key)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 21.589 0 Td [(Matrix)-250(key)111(.)]TJ 3.317 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(A)-291(charachter)-291(variable)-291(of)-291(length)-291(8)-291(holding)-291(the)-290(matrix)-291(key)-291(as)-291(speci\002ed)-291(by)]TJ 0 -11.955 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ + [-525(hksz)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.925 Td [(mtitle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 32.089 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(Optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(A)-239(charachter)-239(variable)-239(of)-239(length)-240(72)-239(holding)-239(the)-239(matrix)-239(title)-239(as)-239(speci\002ed)-239(by)]TJ 0 -11.956 Td [(the)-250(Harwell-Boeing)-250(format)-250(and)-250(to)-250(be)-250(written)-250(to)-250(\002le.)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -21.917 Td [(On)-250(Return)]TJ + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - 0 -19.926 Td [(iret)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.433 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G - 139.477 -128.483 Td [(144)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1824 0 obj -<< -/Length 3542 ->> -stream + [-525(irn\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(ja\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(9.3)-1000(mm)]TJ -ET -q -1 0 0 1 148.768 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 152.354 706.129 Td [(mat)]TJ -ET -q -1 0 0 1 173.658 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 177.245 706.129 Td [(read)-202(\227)-203(Read)-202(a)-203(sparse)-202(matrix)-203(from)-202(a)-203(\002le)-202(in)-203(the)-202(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-810(m)-35(m)]TJ -ET -q -1 0 0 1 149.022 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 152.359 667.24 Td [(m)-35(a)-35(t)]TJ -ET -q -1 0 0 1 171.029 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 174.367 667.24 Td [(r)-35(e)-35(a)-35(d)-169(\050)-166(a)-242(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-107(a)-106(m)-107(e)-241(\051)]TJ + [-525(idiag\050:\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 18.829 -10.959 Td [(hkoffs\050:\051)]TJ/F69 5.1905 Tf -15.277 0 Td [(,)]TJ/F91 5.1905 Tf 0.61 0 Td [(!)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -74.472 -27.896 Td [(T)90(ype:)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F102 8.9664 Tf -4.162 -10.959 Td [(real)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(\002lename)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.185 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ + [-525(val\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -21.918 Td [(contains)]TJ 0 g 0 G -/F51 9.9626 Tf -317.895 -21.918 Td [(On)-250(Return)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 4.708 -10.958 Td [(....)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -4.708 -10.959 Td [(end)-525(type)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(r)18(ead)-250(fr)18(om)-250(\002le.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 442.084 Td [(Tspmat)]TJ -ET -q -1 0 0 1 344.406 442.283 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 347.544 442.084 Td [(type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 9.9626 Tf -12.403 -39.966 Td [(Diagonal)-250(storage)]TJ/F62 9.9626 Tf 0 -19.092 Td [(The)-399(DIAgonal)-399(\050DIA\051)-399(format)-399(\050shown)-399(in)-400(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-399(8)]TJ 0 g 0 G -/F51 9.9626 Tf -268.571 -19.925 Td [(iret)]TJ + [(\051)-399(has)-399(a)-399(2-dimensional)-399(array)]TJ/F67 9.9626 Tf 0 -11.955 Td [(AS)]TJ/F62 9.9626 Tf 13.716 0 Td [(containing)-327(in)-327(each)-327(column)-326(the)-327(coef)18(\002cients)-327(along)-327(a)-327(diagonal)-327(of)-327(the)-326(matrix,)]TJ -13.716 -11.955 Td [(and)-302(an)-302(integer)-302(array)]TJ/F67 9.9626 Tf 94.018 0 Td [(OFFSET)]TJ/F62 9.9626 Tf 34.39 0 Td [(that)-302(determines)-302(wher)18(e)-302(each)-302(diagonal)-302(starts.)-466(The)]TJ -128.408 -11.955 Td [(diagonals)-250(in)]TJ/F67 9.9626 Tf 56.527 0 Td [(AS)]TJ/F62 9.9626 Tf 12.952 0 Td [(ar)18(e)-250(padded)-250(with)-250(zer)18(os)-250(as)-250(necessary)111(.)]TJ -54.535 -12.021 Td [(The)-194(code)-193(to)-194(compute)-193(the)-194(matrix-vector)-194(pr)18(oduct)]TJ/F60 9.9626 Tf 206.92 0 Td [(y)]TJ/F93 10.3811 Tf 7.997 0 Td [(=)]TJ/F60 9.9626 Tf 11.584 0 Td [(A)-42(x)]TJ/F62 9.9626 Tf 14.746 0 Td [(is)-194(shown)-193(in)-194(Alg.)]TJ +0 0 1 rg 0 0 1 RG + [-193(2)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [(;)-213(it)]TJ -256.191 -11.955 Td [(costs)-205(one)-205(memory)-206(r)18(ead)-205(per)-205(outer)-205(iteration,)-214(plus)-205(thr)18(ee)-206(memory)-205(r)18(eads,)-214(one)-205(mem-)]TJ 0 -11.955 Td [(ory)-322(write)-321(and)-322(two)-321(\003oating-point)-322(operations)-322(per)-321(inner)-322(iteration.)-525(The)-321(accesses)]TJ 0 g 0 G - 139.477 -307.811 Td [(145)]TJ + 164.383 -29.888 Td [(166)]TJ 0 g 0 G ET endstream endobj -1832 0 obj +2096 0 obj << -/Length 4155 +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (../figures/hll.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2106 0 R +/BBox [0 0 494 214] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 2107 0 R +>>/Font << /R8 2108 0 R/R10 2109 0 R>> +>> +/Length 2880 +/Filter /FlateDecode +>> +stream +xœÝZËŽ%· Ý×WÔ²{Ñe½ËN`;pAz0 #«›x‚ o€qùýIQÂŒ‘Ezfq›,Š<<,Q*U}:ÝåO‡ÿù÷v?¾y©çÇŸw†«ô™zÁGwþú·óÇóŸpéã‘rM0*”zùÎûTÄ+—x‚®–AvWª$»+Âõ\¯Óyƒ0¶VÐä+9“K‚ˆ 'r ²¿\VGÀàTŒü›ñýÕÒ#õ+ƒ"Á¥jPŠ\5kn éT5^ݧ£–+å8P µ]Àש(›»Bó3–)ó‘é´L¨æÊÄ`6Å [A.Ÿ¿H9|Š—ó¦>>ö+… õñ±\1Θ>Æ«öhê㣿€q­Ü—sIóyÖgZ f¦‡Á1Ø(û%˜ƬOå*mÖÇÃO¥ +sŒ [?-Ïúø\.Ÿò̃åYc1˜PÌ•‰Ál* +f_@.Å0õ ¨ñÕÔ÷}Ög/Ï^¤6öY¤~È#+–MuÔ‚yQR½8[m!ÏNÁÌÌ:–9wˆ”Yÿˆ´…lj‘X_´6‰7µyÖÆXÓà ÊÆ๣(x²È¥X›Pøè4Ÿ +ø¸MºÍL¸A=µ•žHoÐwõ¡ØW¨35è3 ƒe£ÎU<Ê¡^QEª·î*Öo—ëKŸ¯œ ŸF«ˆ"$hÄÐT¨x'ŸÃCŘÐv°ïª¢BÍ Z2Jh\5'MƒEÊ{ä©̃:D™ƒH…À<+ĵ¶:8›ÜM³:Ø5ZÏRl*-% êJtÙTûR-A«ƒ} W NKÄYc@¼Lƒ9bp;1 î'D–9‰YŸFWy,šöüY÷H¤{„QBÛ27™ˆ³:Ó€yPƒ(a©„y¸Vb™;¤Zªƒ™§Y$†–Í™KÕV¹õuV¹UÓbÑTg ^Ô3gB0·Š¹WˆÜ•{/Ðx¿?wý67m¢ð™v¥oLJ£æÍFÆÆ׊0G;nBÀH41á¨@VÁ;º:­D³X¥€û9kŚŠ+—‚%¢h¬UIJ±µ¬ÐY¤sÙç~h|Éؤ}jÆJ5Ö +±.F¬°65€÷bDc¬b4·ÌÐk_Ö&Ç͆Ö&ø͆ƒØÚc&­4¸™l?§b1Êq3bÅbÜfÄ +‚TœNéÞ§ÆÏx±G!\¬Dc¬nÿ­‘(¬MÎB¸‰ÆÆË™I¨\–³äšŠÅ(§Íˆ‹Qð›+è.€MI…éŒáÇõæ¤ ;ì_Tü’Uõ¨ŠÆJ4Ö*¶ÝJ4»@,~›Œ¢Y&PÛdµJãôÈX‰f±Êe›Œ¢Y¬Zƒ-KµV¬Y¦?“LÃ~®òRÇ-_[È«IžXYòÄj’'V–Õ”7YàZ( í@¦ç•o‡o}XŠEËãW<ˆÌ1p„h +í£ÔCs…j(1X†‚‚5÷ð°çq;þŽÜGðS`­«™XïS­.â“[tÖœ³¶äqÑQöáú‚§YµÀÅ|©‘9ŽšøAŠ=t˜À-!2Œ`jÁ(ÕÖ-ï67ª\){v¢l%¬Ùᾧ›]ökv©®Ù‰<³cbcŠc˜ì…Z0Jõ°åA•{·¹Qå ý 6§BÜ»jRçÓ¯ÉoÊòCkYdˆÜaS‹Ú»z`YbàÓ2k`+™ÈçðPð!Gp ‘a£P Á-¶<¨rï6·1çðp5ÛìX£Ø +lþ0[ÁŽ¡Úì2,ë}$XÇK¦á€e“œhÚp È9‚É1¨… [cƽÏÌxç'›kùëø%ÔkrwUðþQ¸‘ý¥x™;ÐÏrü¼"Zü$e¿r?öÑo‡°¡»báOvÅŸ7s_üoýŽ¹×ËUñ™´v¼ËïcyÁ—e•¾8U.°UpØ=pT±$žÔâb(`FÁ&ûœ<çJ÷€C\"…ÑÝ<žÊœÕÐÍð-™È¶Û—<¯Ð1;Â#|¹&D(šÂçýnÚû± Äæö529¸´²,ämI3é©î¢áãxðiWÐÓŠKÇÞ3Ï3TÃg o‡ ‘ÃÁ‡[LFBnæ¡Æ}j8ÔnùÆw1Ö •Eõ« +,ÃØRúi¨¶ ã”9Ó—„¯¦/«,}Ù·8κt ˜š1Æ7 !ÌudÊX6?ØÑÖXoSÃ+Ÿ¢K:–•O#óu !÷zŸÀn|YpUÃ$dO[ +% ÉuÁ.Œ¢é>Cx²„o1i»ôƒï×F/Guk#Š¹žË Ÿ@ÁÛÑ™½„ʺÛ …Æ3öÀÈÖ˜È'<¹Äÿüs»Ÿ¿{=¾yÁ£s˜…¯Ð³Çåó _ŽWXTàŽ…?á÷õ~üüð‡çßÿñüÓwß}x|‚ýc¯5<|ûzÎ?<ŸÏ//Ï¿•*þá§sû÷üOؽ{ÿð-‡›Üüé½”‡ŸÿòúÃñíëñgøÿ[‡¬¿~übB †dó‰;ö&X&a· +Ù<øÇ×O?Ó‚•2.šg9_ÿ*×Ps ŸOøYT¨•®U¼v²rƈºŸÂãSóøbzh”8†Ñ½ÂXZÊü°ó`—ú矆@8èwàô)aÿ…¢ªküœËEvëñ»›šK°|dG˜s%´D YšVòÔ@( ázé‹ñCüÿæ׬á¿8Ç7•xVÇ‹ éÑi3kè4½ðw(Ölú¸V*|ik¬Ô¦âÓ*økµøåÀ—žÑÑ÷K•_á•Box:ÀÀ¯¿Æë‘HÏ+x´“êˆÆi[‡;¬<ž¿Á²ÐùÅ+i œ°ùŸø`8¼=ÿ³·‡ +endstream +endobj +2111 0 obj +<< +/Filter /FlateDecode +/Length 214 +>> +stream +xœ]Á‚0 †ï{Š½Á@²ì¢£¾ÀÙ±L<øö¶E> +stream +xœ]Mƒ F÷œbn€?MLãÆn\Ø4m/€0qÑÛP»èƒïM>x?Ü2øÃ[ùÂÚò¸ÚÍK„ gC¬¬@Ê»\„c¼…{B  Þù.äϲÉ7åîH«puB¢4#k‹¢kµî’ú{ªvaÒG²ŽÉúÒE#ž"6¯›¬ŸÁ4)U:€Ü¼G +¹wî•úÂßלuÉ‚¸Ø<~V +endstream +endobj +2114 0 obj +<< +/Filter /FlateDecode +/Length1 5268 +/Length 3426 +>> +stream +xœÍX}l[×u?÷=~“z|||)’ïñ‰’%’¢ý(‘¢"ÉŠ,ËVì9Ž;dª8‘dEqj·ž›zIÐdmƒ[“¸ Ò! Ú¥hÑ­²ûhKܬuÝu¶Åm‡Lè†.²Dk6¬A$íÜGJv²Æþ ½wϹ_çüÎïžs) Їo?Z´Àù(€ãKgζeéÈ‹KçÒƒJèS¨ø þ=}ÿÙ•3áVñØw€[^9ýÈýíñî‡BûX^8ɾø,@d•åPá]â‡ø×Pî}àÌC·ÇG\ø=ýÑ¥…Î~Wð‘:³ððYÒ¯áøÿBYÿÈ™åŽ}Oã#qö£{¨3œõŸ=·|¶±?àŠ¡M÷°r•{òÀRøíÃoÆaîwt‡à~çQZÀÇ  +d wÀAP`tØ ·Â|NÁ"œ Tè†ûà( ô °ÀÜ ûàIø-¸ ¸¿gáãð)8 +¿†È~~> !PÈSPÐÁÃ5zÛù:swŒzrµ‰º£ûd]e(V $¯ÿ å +”Ë8RÛkÖåó§b::\3èT½@]y6Õ0GkŸxµžÀqµõĿצAݹ=_w:êu\Ïïš¿»@=y;C.àîú…ùù\Æ›·{ÕԶʗ—%½Z,P^ÿ$ÛäOqòÙ9S§®¾Û(®5– :kŒ& £žh8Ò‘¶Ä6 ´­¢+óú_;î„òz‘zsó5]ßgÎ.<¨×ô“‹í%ظ.¶3n­7ô}Ù³¡7Lg;“-N§p$úÇtj™ 8GpvšXFB_m 8i­9Ö±Íp†…ó¦¾ÚÙÜÔkŽ& Jêµ:4g6L½1×0Ø„öö*P‘…AF»%ækÈp Á^æƒ÷Ýè ›ªäщÆ ¶ÛNš /Õ×ÆW°'’¿SdjzšxI„%pžlð±{©™‹h½9À1§ù©#µ&2qÏÒt“è_T_¢ÝË=[{©yŠZćôxb>»9 +'=?~ïâºêy“iÞ÷@þ¦<û‘áÊDÙTßoc>p¿D6?GÉçáõ®Ù„7CúÞ¦k…Ë9‚_ß{ŸÞ„ŽìÃNµ#xY§ïz§;½™ŽàFÁó‹ŽdkŽv„ +A«#t¡êi „ +"¨Ê\ÎægÏ£¨ 1·E™í«8ûv4»·EE)¼-†Ùx‘˜×Ç«L#ܸB”iT.z]£á"ѹm1†¢–ßë4’cpAfóe®Ì½ &æ’Tàehȵz»@qå¨U´ËÞ5ªmð®55ä.¦ûst¤h—\k„Ž©µÚ*KÆÑev®w¤f<ÃFÇ“8:.Ú’³ûµµKþ‚/×ò‹p'ô‹võ|dò¢]Á¹»œ¹vµ‚$S­JýÒ%z{±Ù/7}a©Z­ÒˆD½U°µ^I¶yµºs±¢jD ^מtWÊ#Ã}fÆ£FPë13ÃeÅj·úF†ËD2$·!Ä8x:“ÉZa‰÷¹Â®(—Ý]ÈŽõiòÎ{Fõt,šJ‘ìúÏÈ?m"MîkÇf{ Sw+zFî=ÇöY³}1ÿNIÖ>qtý;FTK&£1ûÍ÷þ’\W‘Èa¬I—1ûçàëÐL"²T,µR^È @a9aYV³G­§ÔʶµnßÔ¶@Köuål` ç‹ÔXµc«3Ö©XÂÚo5“ rÖ¢1ÑVIŽË kÍÊúB"öy­¦b’*¡ä¶X8½"œ®ÚY@ý½ EÅ@ÜúG U ª‰jª12I&ˆ¡nµG¤íþWI15F¢c)}tãßÆR?j¿Û:²rµœv+Š;]Þ˜ѱåÊìþÞµ­±·z9§«ÜW!Iƒ¦Ä"¥–ÜÆø0C*ŠHµ )ŠÀ0©¢fŽDèHÍϪ&³q¸H /ƾ䈥¤`K‹YBaÕckTí 9 Â]ÈpŸ¾*1¶ß‹¤FSmŸ(É—ø.5t¶~‡á`UÔ‚¨Å˜–ïB­‡Åb¸\²¢%«RÒÌ6—SJš@Å敧¦Îú ¿ê×*¿{æñÂÁ;†ÎŸ’‡ž<öÌKdïóß3Œ7.õÂ]Ÿ¾5¹,OL_vò1Ãø +r±@Ó`‡J­Lá fòmܶpÏ£Ö9åÙU» X´š=YF¤–rV3Ûä¬Ò€ÅÎ-ؤS|§s(¦„‰–"Œ>Ö¤éU$Þ_B§$qÎl†4ˆg¸´ëÛk3VÿÆx9øÝsÕãúØÆ¿Þ’ÖÇHÏDÚ>¤h÷äR÷®WÞËŒ áx·×%ß7zJýV%íÂ>s’Ø#k¥+¯¾°?îâk,ž6Æ3LCXD“G¹h{üȪL‘òxÜ0–xžB4lÍ6ñŠ¡cà©bÄä$;2<óÑí¤”’“^”á!ÞÌ|;T“9ùÄå—žzú_Þד«$w*¤‚‘ü]ãõBDøÎs_ºúÝç¿ômr|ÿÙ™¼8¸ûøí¥ÅÅññÅÅíxe1^½ð"4ƒ,+‡œ°P¿ÕŒ` ì„Íi´”v¸‹„æϵÒî`OIÚµÖt§™ÖMü¹fÚÍšiÓŸ#4[¤áUÛƒ1MYMO˜ux8&ìaÍpªXÔ#bÝÍQR›Ð ÚÒ;*Âa¤t‹·;„„íªÚ +;…2;…æÉ‘ÆXs´O’”¬r¥DÞÐü;&.coUSz…Søî®ò± ‘ÐÅñ¿öæ„ùÉ^þ†>µ§¯K}Ýoß-R›?'ÿ€1‚Ç¡ÙǸ¬—ì<1R´ýx:„ÈωHÌÙb‘zV1_Úš¼ÖêáCè‰&Ú€AM +köN|k}h½Y¥ ýïÄõ<+8IùRH”Øa;ß‘šžäV„ü2 1w1è#7œÕ!Ò.Cˆ aBV“8Ì'nt¥ôX‘ºW[.îÄ¡.‡ö¶€eUÐdDJXSSp† Ú~tÞç ·»q¬ËíäqÛDz<^V0¡;D¨tüÜ.¯#ï•ï¬Ê17ì‰IÕcåÌäÆO&uîôБo~óhaƒTtvŒ)æKy}‰ã₩ÊbîÆðºY¥7 I!~;üxMilÕö¢=2Ú.{™Á²€½2kzñœÛ=ØëÅÜFUV‹ÜS\±X»¡ÞK[ק[Ô˜Oõé$=‘®Ý‘MžŽëñiòƒvÖMW6ž<±²C¤®×ö‹h³ƒðhjì´ +¥VÌ1ÒîR±î C?z­´ÓÓ "Ëɹ"M¬2¨a5å„ã2¸™·RM[Tñ‡$B¢­Ùy|÷ʘ¤E¼R·DÃè›Èœì®RC¢~v›¤nçâ˜âÔˆ×c:EÊä?Xœv¡pê/1s‡ŽZ”箩Ùä obAúéh29ú­G~µ~"¥rÏ +æm#Ÿ3Èzw„SÈ0÷ý²¾]…Òeã»ÜøÑÙ=av/ &—œÜ~âävvÍ¡Pj©íóJb¦S„·ö/ÏÖ,Ûž°s[ unDÄ©¿êõ«\ÿÇ≠*Ý©¸#Ü¥þÑÜ3½{6®ÍŸ±†3qÛ'=ó”êçžUøÂôW«iNQxãÖ¯ÌíŽ`­uw+ݯ¿¡ ²€7$ö¡d†|ü'÷(~¯ñóüë®s®»-÷+ž^h³ ýë/‚f <Ž¸éÇ}³®ç÷ââAöß\1ä 0}×Í×þñqÁ3ÎÓÅðy§¾¹‰O=QfÝ€›~8üc˜ûÁë¬ +xïquúÚÛãÛÈsæzp&Ìž[^>º|îÔýÎ +äéÿƒ]¾÷‹ïÀ;›ïStþ+ÀÕ Ãý„¹áß—Áä¾yã]Ô¿†íaÔIâñ}Âäqp¿ƒí?A»ù›ìÏì}¾ú¶Mÿðò½áñÿà¾Â¿ÅÔ×Ìà [ï÷.®ÿ­çMþ÷n´ö¿j¬O +endstream +endobj +2115 0 obj +<< +/Filter /FlateDecode +/Length1 11124 +/Length 7803 +>> +stream +xœÍzy|TU²pÕ9÷ö’N§;kw’îÎMw–NHÈ$¶äfeI aÓ ‚IH‚‚`‚ÐðÜF… ¨àÐé6Á'ѧÎà2à8ãö‰#ŽËLfq„ôW÷v@ù~ó{¿÷¾ß÷Ç»‡ª:§ªÎVUgëFèuµ sò@ý2Z¼|]S{¨œ1ç-ßÐeÿÛÜ·ã#í++ÛW­›Ö} @7 v®Z»qeH?‰êžkmiZñú…±*·‡˜S[‰¾ "†Ê©­ëºné»BqkÛ–7Mô—J(f]ÓÍíšµfKõö›ÖµLŒïnBR{[g×DýIŠ¼}}Kû¯WºÏ“~=€É*ÞG£ªÁ$¾ ‚Ÿœ%øb|Nð‚xHãk‚£<šj?;Nxö@*œÃ)ð2ŒÀØ¥P»`&œ‚Ãñ @‚ +x +œhU`Aà:XŸÁ(¤C5|ŒQÔN%´C¿$\ w‘V”ÃÏa×âBÈ¡ü,–…nê¹?8H¾|ŸJÃg˜„Y”ûDBôÀ¿A¬×ƒ B3ÀÍø%8À Û…¡/x\Gà·XM¹¹°Q|_ÖR­'Ñ‚#Á3ÁÏáE¡…Zº î¦ûa„MæåâØÁWÃ&»ÿÀLJŸ°[YOpIð©àg4Ø`:̇¥Ðà&x‚¼ú2¼Å4O ¯Š›ÄsÁûɶ.(£±×’öBj{;yÉJïÒ,#ÑN³˜Žóp®Â~|ø~À4ÌÁ:ØWÜÇßà SE1XL-ÅA2õ+Áh%ÜJÖ¾Ÿæû¼ +'1]˜M3z—êË®b”žd§ØÇüNÞ/\ïÿãø÷Á>ÐR”Í$;tÃ3d…¿`!×`'~J#ßÉžãÜÌ%^ÈKù"ÞÀïæ»ø/ù¯„õÂAáCq¶Ø$Ô6ß8þv°:xÙACãJƒ,(€i?+)šn ñµSZ›a+ôÁ}/÷äyŸ€“ð[øü‰<è 1¯¦Þ×QÔ݉÷QzáKø*žÄOð[%±Jél*+a嬊­bwRÚÅN³wÙ|_Î{x/¥Ýü(ÿ@A‚b¥Yâvñ€æ mºv–¶Y÷æ…±‹™.~<ã ã׎?8þÒøçÁÅÁ4~'dÃdé6å#ƒû(=C‘x^ƒ7á=u¬_#C‘"ÞŠECy­gâlJsq>¥k(-Á¥”š°[)õ`/Þ†·ãx/> ¦‡inûði «x;Pg`~ð@ІaÐ\K;ï °O+ÒÞÓ Éâ>Y–Kf\í¹ª¸hú´©…ùySrs&gg¹33ÒÓ\ÎT)Åa·%'MJLˆ·Zâb¢£"ͦc¸!L¯ÓjD3„¬J©Êk÷¹¼>Á%Íš•­”¥&b4ýˆáõÙ‰Uu¥ŽÏîUÕìWjʤ¹òÿÒ”CšòeM4Û=àÉβWJvß[’=€Kç×SþÞ +©ÁîSósÕüN5o¤¼ÃAì•ÖÖ +»½öJ_Õ†Ö¾Jo57h+—Ê[²³`0Ì@Yå|©}-3PÍ0Keñ ‘åK**}ñR…2wV6­ðÕͯ¯¬Ht8²³|X¾\jöTæ3¹U(W»ñiÊ}Zµûje6°Ý>˜5Ò·#`†f¯;|…´¢éºzojPúˆtS¿>˦³ÖŠÔxTyý¶Ky_¥uµ])öõm³ûæ×ÿXêPpCµácÎ*o_u¼ƒLX½ÐN}±;ê}x'uhWæ¡Ì)4»©Ráx×Ø}z©Ljí[ã%Ç$ôù`ÁF‡?!A>…„J{ߢzÉá+I”š*& Æ@ß‚Cñ²=þJIvÖ 92dÖÁÓD&ÜøãLËe™šSÕ•\õ‚ËvEeDÒl +Ÿ}¹FR/Ñœ¦+¨e:ô-ŸNjô5 Õò­ ¬öé˽}æb⛕ú>Ñi–ì}ßù_ûÓ•œ¦ ŽÆiþ”¬%—ä—ò>·Û—™©ˆ¶œæU$#—$±×(’ÞK’ËÕ½Åñs ¼,b}:×å&s\tek±ãþ qKH^½Pªž¿´Þ^Ùç°mõ¢+J!ùô˲‰†dpŸà$KÍ–(ô,­WôOtVI•«½³h©Ñ}Ñåõ<‘5„r,‘«MQü^w¹e¥P®´%85jü¯huÀ*íU>³wV7„9ÿÍJà9¥–J~¨61'_±ûÊòUW”¯^x§ .V½hi__ز*Ú¬úúª${UŸ·¯)ìm–ìf©ï¯çõ}í•ÞK'úªv4Ð$Z±8[¹#‚Ö1^ KÌð}×øs±ê¬Låèák:}Ú@¤²™Þ1¥t7xËd¥·$;០UVæÉ¢îÉ*õ§g䩤¼矰CtµãŒ?.Q•|ì/+›ÈLÊ efç) ãÃ_ÿ˜Ÿ¡CE­5”>9ï\©‘Èoñ6à¿™8”êÊÛs‚¿Iò×ùI:O•j'ýÆÈ=ø^*+tÏDùQ¢Šü§üG¨Gôá úñ‰>¨>xmü‰òÞ­Öëš ¼ÓŸl3—&“ÜNKÀ)·‹r»Èt»¨„‘.vkÕž‰æ]¢d®-~‡¤úhË%>o€Lº…L¿…,·…,·m¾¤³9¤“Í7“ÎfÒÙL:›É*¹¼“úë$‡a3€“Ý;Éî +ßGx„à´Ê¿ƒðN‚¥Äo";fШîáküé6 +²UCEr^ÉqºÇ"5»r(>)¯ÿ‡’>L D¢Ԥ趨Җ!}¸ÂmJH +QÒº¡4‚/‡[ÄN%( ¨ ørjŽm˜Ïƒu:#l=¬‡÷=¢[Q'xÔé€B2Šgƒ‡2lœæÕ·ë{õܬ·ësõ²¾N/¶Ñ“¨ŸsÏá%¼–7r1ñk‹ó‰È35Åù; ŸaÄpÚ ú4#šÓšQÍ9h×äjdMÆ«i×ôjvj4úšZæ5´z Ül°r ²¡Î Ú´8Pz'oV-a3A;ÁNlÜH|;¿ž ‘¼ÑH¦¸žø@¨d&8MùQ¢"•L¤g"=qMÄ5+’:/Aû„TsYr©Ž¢N‘¤‘4‚¸dÛQÂç”Á*©d¤’‘´N³ 4B3a;AWy£5„/Ér'ä^*?§ê\’ÉJ]vAnJÉ@_dàÎ ”=%¥yr +¡¨¨¨F©ÑÙ˜Þ¸Oh“Úœmémû„Z©ÖY›^»O(‘Jœ%é%û„)Ç™“ž³O°I6§-ݶOè¯9\s¢æTÐXÓVÓSç‘ë†üîÜ<•¦8zÄŸ7ÍTz5;LÓi$¼‡à aAA AÈ«Üg‰û,qŸ…Z‚F‘j=«l1„m2…¿G•)9Eήsšü!q~mi m»{8µ}ˆä‡TíPî°Ê÷Uùµú*_Ѳ\ª§l‚KÕín)-Ã¥PBÐHÐN Â)¾ÎPë„mí‡ ¾”Ò¾„=Ké;ijdã”XÄÅÑÉ©3—šY8Å‚ŸRñÃ*¾GÅ%*N•#æ¿c|qŽñ®9Æ4Ê°t:ƒŒ¸KÅÙPj|®ÔX[jÌ(5Rkp€‘ŪX£`ü£Šç©8KŽq¿sÿæ0þÕa|Üaìp¯v(õ&Ñ6²LTÏQ±K6ØŒ¯ÙŒKlÆi6c©w#õe*NVq¢‚ñëçL&Ðǯ¡‚ZB¿'Ã`  ú=¥DÆýž™D.ú=»‰üÓïù‰íüÕ£ ¿õ§žµ•Æâyœ-(å¿Mпâlz.Ùèñ<›n6Ütý™ß³UÑ’êÿ”ÊO@ŠNÑßKA…îÁÙ*ÿñ‰zù³š©×GýY©×ŸB–ÚëCþ¬³Äý‰?ë"÷û³Öé÷;•®ñ{2m¥‘¸ +R™¢»œLIÍD³¨åµDg†*Wú³”ZJ,÷KSˆ¤)£|%¨S»³ù%u’I ©MLIt"8U&uðFHQ©Î/m¥V4Ï9ÏÚþî9®L¾A“·íÓh~‹©ø{œí?h{û˜b.¿íTVGm¿’ŽÛ^M àb¿m$+ #Á‰¬Ã#¶A2²tµÎZe{VR¥û$’’«÷x²mJKm8©ì·mÍzA¬£/&qCÖ [ç ­Ê@ËêL³KëmEÄžÀÙCmSRÊPr©ƒGm™Ô£KR‡rÍ´aVZì–³´]Úfíbí|íUÚ|m¶Ö®MÒNÒÆè¢tf]„.\¦Óé4:AÇt ‹ Ge·ro‹Ñ˜ÕŸø jÞÌÌÔŸþ€¡ŽÑÚñEójV½° }QÕP½¨Ì7Í]Ðø¦»«}ººkëïk ’Ý@XTOª°îLTwÇ1çÎ{ºùÎ{°Ú7²ª›í¾oÒ<Âè’*JeVˆÛPb-‰šYTUñ/w»ø¬îÖ$ßƒÕ ë}Ï$5øò”L0©¡Ú7Syc¬­²âkWHCý1ÜÄ:*(|ÜTÑpY RX;©G!ŠÚ¤(j‚CªZªFašRY1˜’Rzg+J>/«J«Bm¥RÔVBH%CªÚV*KVÔ(B™~ÜX8 ImÌjc“¥A§“T²œŠÊà4') :§©âƒ?ˆ%gh8 àTûqbƒÚâ:é!Š‚ ¦#÷ÿϯ¥ì ŒCM­X®<νRe ×·}C«Õ×Ûl·®øhâÕîò6/oUhS‹ï#©¥Â·Bª°6-ÿâ劸Iª„å•‹ê—Ë-þ&¹©RjªhÚßS^}E_÷\ç_4Ö£4V®ôµ¿ú_ˆ«ñ~¥¯j¥¯j¥¯ýò~µ¯êeX]W?¨ƒ²zÝ©tˆÂh=x eqæöêâ¸Êa½5qX:¶ ô2—Ê|FE”]š]ªˆhu*¢åç— ‘õÖ«‰ÃøÔ„ÈLìH© Ü`­\]qù_ggg—ÝÝnÂ]ÝV•×E‹Ö±°ÚW¥<=>O¥OöV4 âŽî‰¯¼^6Ÿðœò°6O§ß³ÇsØ#vw7;êDÊ©Ö˜Ò–Ò“ÒŸ²'åpŠF\WTöìIùK +ï¦hÂ.ú*+Ô>»‰Ò?¥ØÕÝ©|@t„ºsw»ËëKS`9Ýz‘nèÙM ä,$á?¿Cð)Á߸ðOž$R8<›gÓÓ½Bé±Á­l:Vž7”[˜7=@´ieˆ.\¢•óBÔSšg%ê/É+5Ña˜ðë|EðO‘çñ<µñîPÔ6tB§iø@….uº»ÐMTÌÝÕévƒJ€“HÕWÆ=`g7)È!DHIåv*ÕºzY)gQ½3ƒæ2<Î^¤Ëª–ðƒ(Ø‹ÏqÓ*™#ñ:x‚ä 8f€oÀëÁê6ë¹è™g>ï™{Ñ%”7_ 4%×éˆt¢­.ØùÈY„ïÁ.Œ¨›ÿ9êR#¶Ò3ñ!9F¶z­ÖQ«VÙÊ6À]À"J£q5ÝTô8@OS®æu”—¨ò?èu½âˆøµLg°‰éŠz]8ã0Œ'õÙrTD„IŽ,Ì5õ˜všL‚)Þ2ÌRñ,˜»Ìí™k;kVF\≌*ÂÈ"øfì~ãvOÉ…eر,Ú™g‰uÎ`…‘i.—”¢=‡sÑžëÆ™wz\˜Ö™à,~±÷ûmë§'3§“%MÙÄ>Ú•iO¶)s¬ žåûÄ0À³Çèý9:8CTOGÊÄëPä™ú2^ã€ñu<ÉÞÇ÷Ù¨Ñh@0ÊFÎDA ËŽœÀY çLàFQžY(þ5D4¿W,ÀGŽÐ.³/è=ñ¹Ng­ u€ +/°?@8&ÓmFñÔ²±³Êä瞧i¹ÍcîÏ6q²{[Ä–W¦äbÇúe°žB)Ú‹:R´mÚ¯Ø{ãžv|`|{Gî¢ü$±ÆõÏ…W'{ ê6 ‹üxü˜ŒUòmZ«¡ÈbtuU&¯ Sr\\†Ö£­}Z«‘í× +Ku×Z–ZoÐuEvE=fx<â‘ÈC†C'Å“–_Z?°|`µ'|g‰¥;—/&ÆÆÇÅ[’¬Z½Å`5$ÄÏŒ¿ÇÒo×Zã³$ćÇkŒ<ž‰«%.6F-Ø*ëõrLxI¯õž/‡›Å„þxÜ8žÅó| +Ž{‡…'ð^Ùšß×F7F·E÷D ÑÔÊÑ2M*ì²½×νö;³ÇÇïh}Q–cYëaýì;Åΰ¿0‹· ã}ªyUËžõŒÍ3/ëø–l¼lÌ<¦Øùâ²OÉÅ÷ÙÈ(K‘jQEEÌáÙf·¼qÉòË”E näŽB€Â +6Vš:5?O™šFË´Ž¼©S§ñƒF± í»o\±ÇåŒ?õè¾ßåÎÙÿÝ l^»¤*ÅñïX†?½uwDZ×~³sÕª'ŽŒŸ›nž’ Ê*ßü‚÷‘¿âÁù¸In@ÔçÛò3ÓÚò7¥ôzÃ{zosöºúòŸ¶îK8à +.áy×ñ´WÃ^5¼gŒÓBjŒ,AŸg´$8ΈjÜ·ïŒx"®‚b¤;ÎNoÄkÓ®Ë_kp5[åZ“Öš nNÛµ9¿_è{µ½ºÛ"o‹êé{XxP·+òÁ¨Gãö»žM{6? Õ}iø*üˈ/Ó¾ÌËÐõiÅP„ÓóÄ +„'¤ *2[ìtÏökÄl…D“JõtÕÑ£¬B.å͸R6C¡\ÈäBoá@áh¡P(½@Ž3 MrX®E¶ì´pK|Á0þ¿¼ä±ócŠÇΞ'÷•Œ)nCKÑ”Üòrž;'9%2NÐÅ:¢Ô 6mR3fÅd6Ãä¨ÜfLì͘¬#äŽËn†œHB ÞDÔýv+}´³@‡²Î\®Â‚w5Ú8%~5RJšÂsNrlLœ%Z£üCO‘-†árþ-iˆï¥|&´ +Å-ºMú›Âo6nŒ¾É¾]w{t˜^ןÁ®Ò‰iVGšUäÉN´â0=¦­(?—V§E-yMÖç8ÛœtâA²²ü"D$w?g±€Ñ:L«+MÏC”9ÊÅ£Ø"GA†œÑ›Áå oÆ@Æh†Ã´J¤&‡cañéWøk,ä°‹giû4Ó +$->OK±H]ʪ:-31Uî2;'¹$—Íèh†$SB3¦ê(g7$7cb$¡½3´/Ý“BËp™¥pêÔ¨i´îÈ1Ó&œÅ´ •Zª§ÖÞ6úvÆã=ýo®¼åµ7Ýÿñk{_dùQeç6ÜÕPÚ8ùÖINÖ©‡[~÷¼ûÓ}¿ÿýøÆ­kرÛæ5}róÀîwnZœ¥úa­³çÉfH…a“NÝÁD1V!FcB€ö¤(}¸d“]^×€kÔ%¸"vD#´AôÃÝÕâÃzÈXêy37´W©F©ÁT)5%•ird­sRbRbr"×D»LNƒËo‰g‡IÛ—†ŒA¹¸pÅlHž¨l_QæØfˆ‹mþa÷ÊT!3sktATÈX‘1Lݳ¦™U+M›©F4²†ÍÙѵÔûØæGï~§ùå­ë^©,ê˜Ú•<97µ(£¸¢pVÛýÖ.(Ýóêøá?}à³—þ>þÅàMëaÑvæ:®^8þ˜j3;ZÇÈfa`„÷äô8#mà•FÙÄef†c¬–&ˆ\/jP7A7 +šp#Ý&ÉQZ]ŒV«ÓqA« ×͈ÆãøÝn ¸G6Š¨Ñë4(„‡ ÇéÍÍé~¹R6èõ&Ž{øaÎxÿ.[±D½˜ÐK·ÈQ7idZñ?:õ;> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(9.4)-1000(mm)]TJ +/F62 9.9626 Tf 99.895 706.129 Td [(to)]TJ/F67 9.9626 Tf 12.291 0 Td [(AS)]TJ/F62 9.9626 Tf 14.062 0 Td [(and)]TJ/F67 9.9626 Tf 20.47 0 Td [(x)]TJ/F62 9.9626 Tf 8.834 0 Td [(ar)18(e)-362(in)-361(strict)-362(sequential)-361(or)18(der)74(,)-390(ther)18(efor)18(e)-362(no)-361(indir)18(ect)-362(addr)18(essing)-362(is)]TJ -55.657 -11.955 Td [(r)18(equir)18(ed.)]TJ +0 g 0 G ET +1 0 0 1 146.769 574.688 cm q -1 0 0 1 199.577 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 203.164 706.129 Td [(array)]TJ -ET +.49594 0 0 .49594 0 0 cm q -1 0 0 1 231.784 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +1 0 0 1 0 0 cm +/Im9 Do Q -BT -/F51 11.9552 Tf 235.371 706.129 Td [(read)-222(\227)-223(Read)-222(a)-223(dense)-222(array)-223(from)-222(a)-223(\002le)-222(in)-223(the)-222(Ma-)]TJ -57.767 -13.948 Td [(trixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.941 Td [(c)-175(a)-175(l)-174(l)-845(m)-71(m)]TJ -ET -q -1 0 0 1 200.884 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S Q +0 g 0 G +1 0 0 1 -146.769 -574.688 cm BT -/F54 9.9626 Tf 204.572 667.24 Td [(a)-70(r)-70(r)-71(a)-70(y)]TJ +/F62 9.9626 Tf 166.233 552.771 Td [(Figur)18(e)-250(8:)-310(DIA)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 232.04 667.439 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 114.839 401.402 cm +0 0 313.823 115.567 re f Q -BT -/F54 9.9626 Tf 235.728 667.24 Td [(r)-70(e)-70(a)-71(d)-203(\050)-150(b)-206(,)-926(i)-152(r)-151(e)-152(t)-478(,)-905(i)-129(u)-130(n)-129(i)-130(t)-434(,)-882(f)-107(i)-107(l)-107(e)-107(n)-106(a)-107(m)-107(e)-241(\051)]TJ -0 g 0 G -0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -85.023 -27.896 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 136.657 506.308 Td [(do)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [-525(j)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(1)]TJ 0 g 0 G - 0 -19.925 Td [(\002lename)]TJ + [(,ndiag)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(r)18(ead.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.533 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.184 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 9.415 -10.958 Td [(if)]TJ 0 g 0 G -/F51 9.9626 Tf -95.322 -19.925 Td [(iunit)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ + [-525(\050offset\050j\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -317.894 -21.918 Td [(On)-250(Return)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(>)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(b)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(0)]TJ 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-398(array)-398(of)-397(type)-398(r)18(eal)-398(or)-398(complex,)-434(rank)-398(1)-398(or)-398(2)-398(and)-398(h)1(a)-1(v)1(ing)-398(the)-398(ALLO-)]TJ 0 -11.955 Td [(CA)74(T)74(ABLE)-257(attribute,)-258(or)-257(an)-257(object)-257(of)-257(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 177.91 0 Td [(psb)]TJ -ET -q -1 0 0 1 369.841 430.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 372.979 430.129 Td [(T)]TJ -ET -q -1 0 0 1 378.837 430.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 381.975 430.129 Td [(vect)]TJ -ET -q -1 0 0 1 403.524 430.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 406.663 430.129 Td [(type)]TJ + [(\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(,)-259(of)-257(type)-256(r)18(eal)-257(or)]TJ -251.973 -11.955 Td [(complex.)]TJ 0 -11.955 Td [(W)55(ill)-275(be)-276(allocated)-275(and)-276(\002ll)1(ed)-276(in)-275(if)-276(the)-275(input)-275(\002le)-276(contains)-275(a)-275(right)-276(hand)-275(side,)]TJ 0 -11.956 Td [(otherwise)-250(will)-250(be)-250(left)-250(in)-250(the)-250(UNALLOCA)74(TED)-250(state.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(then)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.88 Td [(iret)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + 9.414 -10.959 Td [(ir1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 139.476 -248.035 Td [(146)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -ET - -endstream -endobj -1839 0 obj -<< -/Length 6815 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(1)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(9.5)-1000(mm)]TJ -ET -q -1 0 0 1 148.768 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 152.354 706.129 Td [(mat)]TJ -ET -q -1 0 0 1 173.658 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 177.245 706.129 Td [(write)-275(\227)-275(W)74(rite)-275(a)-275(sparse)-275(matrix)-275(to)-275(a)-275(\002le)-275(in)-275(the)-275(Ma-)]TJ -50.45 -13.948 Td [(trixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.48 Td [(c)-175(a)-175(l)-174(l)-828(m)-52(m)]TJ -ET -q -1 0 0 1 149.539 667.901 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 153.049 667.701 Td [(m)-52(a)-53(t)]TJ -ET -q -1 0 0 1 172.236 667.901 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 175.746 667.701 Td [(w)-52(r)-53(i)-52(t)-52(e)-186(\050)-167(a)-242(,)-900(m)-126(t)-125(i)-126(t)-125(l)-126(e)-426(,)-926(i)-152(r)-151(e)-152(t)-478(,)-904(i)-130(u)-129(n)-130(i)-130(t)-434(,)-882(f)-107(i)-107(l)-106(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ + [(;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(ir2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -75.851 -26.279 Td [(T)90(ype:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + [-525(m)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -19.464 Td [(On)-250(Entry)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(-)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.464 Td [(a)]TJ + [-525(offset\050j\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(sparse)-250(matrix)-250(to)-250(be)-250(written.)]TJ 14.944 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 578.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 578.584 Td [(Tspmat)]TJ -ET -q -1 0 0 1 344.406 578.783 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 347.544 578.584 Td [(type)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(else)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG 0 g 0 G -/F51 9.9626 Tf -268.571 -19.464 Td [(mtitle)]TJ + 9.414 -10.959 Td [(ir1)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 32.09 0 Td [(Matrix)-250(title.)]TJ -7.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(A)-231(charachter)-230(variable)-231(holding)-231(a)-230(descriptive)-231(title)-231(for)-230(the)-231(matrix)-231(to)-230(be)-231(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.464 Td [(\002lename)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written)-250(to.)]TJ -19.367 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.534 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-387(in)]TJ -303.764 -11.955 Td [(which)-234(case)-234(the)-233(default)-234(output)-234(unit)-234(6)-233(\050i.e.)-305(standar)18(d)-234(output)-234(in)-233(Unix)-234(jar)18(gon\051)]TJ 0 -11.956 Td [(is)-250(used.)-310(Default:)]TJ/F59 9.9626 Tf 74.799 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(1)]TJ 0 g 0 G -/F51 9.9626 Tf -104.936 -19.463 Td [(iunit)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.109 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.202 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -62.186 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(-)]TJ 0 g 0 G -/F51 9.9626 Tf -317.895 -20.764 Td [(On)-250(Return)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(offset\050j\051;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.463 Td [(iret)]TJ + [-525(ir2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -20.763 Td [(Notes)]TJ/F54 9.9626 Tf 14.944 -11.956 Td [(If)-283(this)-282(function)-283(is)-283(called)-282(on)-283(a)-282(matrix)-283(a)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-283(on)-282(a)-283(distributed)-283(communicator)-282(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-225(matrix)-225(when)-225(appr)18(opriate,)-230(e.g.)-302(for)-225(debugging)-225(purposes,)-230(one)-225(could)]TJ/F52 9.9626 Tf 318.257 0 Td [(gather)]TJ/F54 9.9626 Tf -318.257 -11.955 Td [(the)-339(whole)-338(matrix)-339(on)-338(a)-339(single)-338(rank)-339(and)-338(then)-339(write)-338(it.)-576(Consider)-339(the)-338(following)]TJ 0 -11.955 Td [(example)-250(for)-250(a)]TJ/F52 9.9626 Tf 62.495 0 Td [(double)]TJ/F54 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(matrix)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 99.895 178.717 cm -0 0 343.711 82.69 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(m;)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG -BT -/F94 8.9664 Tf 102.884 250.747 Td [(type)]TJ + -9.414 -10.959 Td [(end)-525(if)]TJ 0 g 0 G - [(\050psb_ldspmat_type\051)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(::)]TJ + 0 -10.959 Td [(do)]TJ 0 g 0 G - [-525(aglobal)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -21.918 Td [(call)]TJ + [-525(i)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G - [-525(psb_gather\050aglobal,a,desc_a,info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(if)]TJ + [(ir1,ir2)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(\050iam)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(==)]TJ + 9.414 -10.959 Td [(y\050i\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_root_\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 37.659 -10.959 Td [(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(mm_mat_write\050aglobal,mtitle,info,filename\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - -37.659 -10.959 Td [(end)-525(if)]TJ + [-525(y\050i\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(call)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(+)]TJ 0 g 0 G - [-525(psb_spfree\050aglobal,)-525(desc_a,)-525(info\051)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -23.747 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F59 9.9626 Tf 129.513 0 Td [(C)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 99.895 137.797 cm -0 0 343.711 16.936 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(alpha)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(*)]TJ 0 g 0 G -BT -/F94 8.9664 Tf 102.884 144.073 Td [(psb_i_t)-525(psb_c_)]TJ + [(as\050i,j\051)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(<)]TJ + [(*)]TJ 0 g 0 G - [(s,d,c,z)]TJ + [(x\050i)]TJ 0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(>)]TJ + [(+)]TJ +0 g 0 G + [(offset\050j\051\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -10.959 Td [(end)-525(do)]TJ +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.415 -10.959 Td [(end)-525(do)]TJ 0 g 0 G - [(global_mat_write\050ah,cdh\051;)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -23.747 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ 0 g 0 G - 164.384 -29.888 Td [(147)]TJ 0 g 0 G -ET - -endstream -endobj -1846 0 obj -<< -/Length 6720 ->> -stream 0 g 0 G +/F59 9.9626 Tf 16.096 -32.463 Td [(Algorithm)-250(2:)]TJ/F62 9.9626 Tf 60.055 0 Td [(Matrix-V)111(ector)-250(pr)18(oduct)-250(in)-250(DIA)-250(format)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(9.6)-1000(mm)]TJ -ET -q -1 0 0 1 199.577 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 203.164 706.129 Td [(array)]TJ -ET -q -1 0 0 1 231.784 706.328 cm -[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S -Q -BT -/F51 11.9552 Tf 235.371 706.129 Td [(write)-374(\227)-375(W)74(rite)-374(a)-375(dense)-374(array)-374(from)-375(a)-374(\002le)-375(in)-374(the)]TJ -57.767 -13.948 Td [(MatrixMarket)-250(format)]TJ/F54 9.9626 Tf -25.158 -24.509 Td [(c)-175(a)-175(l)-174(l)-858(m)-83(m)]TJ -ET -q -1 0 0 1 201.262 667.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F54 9.9626 Tf 205.076 667.672 Td [(a)-83(r)-83(r)-83(a)-83(y)]TJ +0 g 0 G +0 g 0 G + -97.969 -26.976 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F67 9.9626 Tf 110.953 0 Td [(psb_T_dia_sparse_mat)]TJ/F62 9.9626 Tf 104.607 0 Td [(:)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 233.175 667.872 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 99.895 220.639 cm +0 0 343.711 115.567 re f Q -BT -/F54 9.9626 Tf 236.989 667.672 Td [(w)-83(r)-83(i)-83(t)-82(e)-217(\050)-149(b)-206(,)-941(v)-165(t)-165(i)-165(t)-166(l)-165(e)-505(,)-927(i)-151(r)-152(e)-151(t)-478(,)-905(i)-130(u)-129(n)-130(i)-129(t)-435(,)-881(f)-107(i)-107(l)-107(e)-107(n)-107(a)-107(m)-107(e)-240(\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 112.299 325.546 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -86.284 -26.38 Td [(T)90(ype:)]TJ + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.493 Td [(On)-250(Entry)]TJ + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G - 0 -19.493 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(Rigth)-250(hand)-250(side\050s\051.)]TJ 13.839 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(An)-190(array)-190(of)-190(type)-190(r)18(eal)-190(or)-190(complex,)-202(rank)-190(1)-190(or)-190(2,)-202(or)-190(an)-190(object)-190(of)-190(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 277.745 0 Td [(psb)]TJ -ET -q -1 0 0 1 469.676 578.595 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 472.814 578.396 Td [(T)]TJ -ET -q -1 0 0 1 478.672 578.595 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 481.81 578.396 Td [(vect)]TJ -ET -q -1 0 0 1 503.359 578.595 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 506.497 578.396 Td [(type)]TJ + [-525(psb_d_dia_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(,)]TJ -351.808 -11.955 Td [(of)-250(type)-250(r)18(eal)-250(or)-250(complex;)-250(its)-250(contents)-250(will)-250(be)-250(written)-250(to)-250(disk.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 9.414 -10.959 Td [(!)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -31.448 Td [(\002lename)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 44.274 0 Td [(The)-250(name)-250(of)-250(the)-250(\002le)-250(to)-250(be)-250(written.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(DIA)-525(format,)-525(extended.)]TJ 0 g 0 G -/F51 9.9626 Tf -44.274 -31.448 Td [(vtitle)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 28.772 0 Td [(Matrix)-250(title.)]TJ -3.865 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(A)-244(charachter)-243(variable)-244(holding)-244(a)-243(descriptive)-244(title)-244(for)-243(the)-244(vector)-244(to)-243(be)-244(writ-)]TJ 0 -11.955 Td [(ten)-250(to)-250(\002le.)-310(T)90(ype:)]TJ/F51 9.9626 Tf 70.763 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -108.95 -11.955 Td [(Speci\002ed)-359(as:)-529(a)-359(character)-360(variable)-359(containing)-359(a)-360(valid)-359(\002le)-359(name,)-387(or)]TJ/F59 9.9626 Tf 298.533 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(,)-387(in)]TJ -303.764 -11.956 Td [(which)-254(case)-253(the)-254(default)-254(input)-253(unit)-254(5)-254(\050i.e.)-321(standar)18(d)-253(input)-254(in)-254(Unix)-253(jar)18(gon\051)-254(is)]TJ 0 -11.955 Td [(used.)-310(Default:)]TJ/F59 9.9626 Tf 65.184 0 Td [(-)]TJ/F54 9.9626 Tf 5.231 0 Td [(.)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)]TJ 0 g 0 G -/F51 9.9626 Tf -95.322 -19.492 Td [(iunit)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.108 0 Td [(The)-250(Fortran)-250(\002le)-250(unit)-250(number)74(.)]TJ -2.201 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 23.999 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -62.186 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)-310(Only)-250(meaningful)-250(if)-250(\002lename)-250(is)-250(not)]TJ/F59 9.9626 Tf 287.757 0 Td [(-)]TJ/F54 9.9626 Tf 5.23 0 Td [(.)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG +/F102 8.9664 Tf 0 -21.917 Td [(integer)]TJ 0 g 0 G -/F51 9.9626 Tf -317.894 -20.836 Td [(On)-250(Return)]TJ + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - 0 -19.492 Td [(iret)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(Err)18(or)-250(code.)]TJ 4.434 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ/F51 11.9552 Tf -24.907 -20.836 Td [(Notes)]TJ/F54 9.9626 Tf 14.944 -11.955 Td [(If)-290(this)-289(function)-290(is)-290(call)1(ed)-290(on)-290(a)-289(vector)-290(v)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-290(on)-289(a)-290(distributed)-290(communicator)-289(only)]TJ -14.944 -11.955 Td [(the)-316(local)-317(part)-316(is)-316(written)-317(in)-316(output.)-509(T)92(o)-316(get)-317(a)-316(single)-316(MatrixMarket)-317(\002le)-316(with)-316(the)]TJ 0 -11.955 Td [(whole)-243(vect)1(or)-243(when)-243(appr)18(opriate,)-244(e.g.)-307(for)-243(debugging)-242(purposes,)-244(one)-243(could)]TJ/F52 9.9626 Tf 318.257 0 Td [(gather)]TJ/F54 9.9626 Tf -318.257 -11.955 Td [(the)-349(whole)-349(vector)-349(on)-349(a)-349(single)-349(rank)-349(and)-349(then)-349(writ)1(e)-349(it.)-607(Consider)-349(the)-349(following)]TJ 0 -11.956 Td [(example)-250(for)-250(a)]TJ/F52 9.9626 Tf 62.495 0 Td [(double)]TJ/F54 9.9626 Tf 28.692 0 Td [(pr)18(ecision)-250(vector)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG -ET -q -1 0 0 1 150.705 149.348 cm -0 0 343.711 82.69 re f -Q -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG + [-525(offset\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.56 0.13 0.00 rg 0.56 0.13 0.00 RG -BT -/F94 8.9664 Tf 153.694 221.378 Td [(real)]TJ + 0 -10.959 Td [(integer)]TJ 0 g 0 G - [(\050psb_dpk_\051,)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(allocatable)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG [-525(::)]TJ 0 g 0 G - [-525(vglobal\050:\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -21.918 Td [(call)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(psb_gather\050vglobal,v,desc,info\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(if)]TJ + [-525(nzeros)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [-525(\050iam)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [-525(==)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(real)]TJ 0 g 0 G - [-525(psb_root_\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(then)]TJ + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(call)]TJ + [-525(allocatable)]TJ 0 g 0 G - [-525(mm_array_write\050vglobal,vtitle,info,filename\051)]TJ -0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.958 Td [(end)-525(if)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - 0 -10.959 Td [(call)-525(deallocate)]TJ + [-525(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(\050vglobal,)]TJ 0.00 0.44 0.13 rg 0.00 0.44 0.13 RG - [-525(stat)]TJ + [-525(data)]TJ 0 g 0 G -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(=)]TJ + [(\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -9.414 -21.918 Td [(end)-525(type)]TJ 0 g 0 G - [(info\051)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf -2.989 -23.777 Td [(T)92(o)-250(simplify)-250(this)-250(pr)18(ocedur)18(e)-250(in)]TJ/F59 9.9626 Tf 129.513 0 Td [(C)]TJ/F54 9.9626 Tf 5.23 0 Td [(,)-250(ther)18(e)-250(is)-250(a)-250(utility)-250(function)]TJ +/F59 9.9626 Tf -12.404 -39.731 Td [(Hacked)-250(DIA)]TJ/F62 9.9626 Tf 0 -19.039 Td [(Storage)-362(by)-362(DIAgonals)-362(is)-362(an)-362(attractive)-362(option)-361(for)-362(matrices)-362(whose)-362(coef)18(\002cients)]TJ 0 -11.955 Td [(ar)18(e)-247(located)-246(on)-247(a)-246(small)-247(set)-247(of)-246(diagonals,)-248(since)-246(they)-247(do)-246(away)-247(with)-247(storing)-246(explic-)]TJ 0 -11.955 Td [(itly)-361(the)-361(indices)-361(and)-361(ther)18(efor)18(e)-362(r)18(educe)-361(signi\002cantly)-361(memory)-361(traf)18(\002c.)-643(However)74(,)]TJ 0 -11.955 Td [(having)-229(a)-229(few)-228(coef)18(\002cients)-229(outside)-229(of)-229(the)-228(main)-229(set)-229(of)-229(diagonals)-228(may)-229(signi\002cantly)]TJ 0 -11.955 Td [(incr)18(ease)-201(the)-201(amount)-202(of)-201(needed)-201(padding;)-217(mor)18(eover)74(,)-211(while)-201(the)-202(DIA)-201(code)-201(is)-201(easily)]TJ 0 g 0 G - 29.64 -41.41 Td [(148)]TJ + 164.384 -29.888 Td [(167)]TJ 0 g 0 G ET endstream endobj -1852 0 obj +2118 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (./figures/dia.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2126 0 R +/BBox [0 0 499 211] +/Resources << +/ProcSet [ /PDF /Text ] +/ExtGState << +/R7 2127 0 R +>>/Font << /R8 2128 0 R/R10 2129 0 R>> +>> +/Length 2502 +/Filter /FlateDecode +>> +stream +xœÝZ¹neÇÍïWÜ xÕûJ€$À‰á†a(¢íQ@9Ðﻪ»NuÝÖÈv Áཪ®åœîêõñóé.:þ“Ï—·ã«õüô¯#¸óçÃ]¹÷âúùýá[¾Jð§ï1\ù|[ŠVI~=>Ùo&ªP“7U¨‰÷‰U&Ë$í&é&µl&P,“Îòχ!¹Ak€Ò°’Ö\m+K¶µ;ÛÊ’mõ!Øæ!ÞÚs¹µ³xkoíÖÎ"·>ÜY¯FÝ\ÎÔ»;ƒ§Ì?ýýüóùOjútÔ\®”úYc»Zån€¦P7¤’ÏêÓÕœ? u`mù,%^±7•_ŽâýÕÛÒõ+º2r¼ªéî +5žˆÐ"}æ¨9 ¿@â–;—㇟ê]p™Š÷rÔë­v.e—{®ç‡ïß1ÿ¹<\m—³ì 6ï"9.v*³#ÍoU´NÉM×@^IU#°$PsôÅ ¹¡Q´â¼£Ÿcú¾ÙQJ®_5&Ã)æv…¶8E_®â&'|WNP1œ!#ü┊¿|¬à”]¸jœð}q‚FÑŠóŽÞŽÒ{a4F)¶F“ÔrrŠ©^QQûÐóÕ +RºR7€yÃ_ä•W5@&ùLa¨MÚÌðß8Œ±z—¼Æˆ•Ö¯vc&EV}I yîåj–Z¦…"îÙÅ«j"jКDPè3…¡&Ô¨`c1Æì2£6ÇÞ§F%1x)hfñЙ'%Ž»äH‹@l£¼fQ¼Þæ/´«Î5Ð,²•çä×ÂnÙߌ&ö«-nr§îè7©ŒÀé‘oéÞT!~¯˜XJâñì_œ-†W |⑳4ã,±Â¨ˆ(8½,¸K3]pžAÄ=çD²Ç‘³W3¡R¤2&WÇH3‹FsH„=ç}ö54®Èµ_ùRùd û•/tâ¦3dsl~×ý +Š±yõ…(Áe·bEãc6+ߟfhù¾6+hÔé¼CÇ$ŠtÊ¥‹@íéªãŠÔÇäõ•æw$>Kï#<;8ê7îRYÕ%„h^W™J*Ì+šÎÕ¦Š!_‘9Ð)hl¥Œù4äpnØÖ¼º‡ÝqùKœÊ56ÎÓ,@ …/¾)ߎÝç•©3w§¼Í+Ï+(ÅÕRœ +­æm¥Ð°Kã©Ž½®8½†Ëŵ2@þ2Úß'.#æ»õÞòvìÞ¯ºöB£ý7ÓoÝùb+â7ŒÊˆ÷çÚÅ☦xQ¿¸4ÞBL›ƨ_¥ó+J§•Êw“˹4ŒÛM3ß\¦ÄÝ +kE7úÍ +k%¯+0Y¯+hŸ¯+Ú®¯+hŸ¯+ڎוÿå!8:äÐW_i›œj +mQ´YOë"¿áê:ÚÆ-õãGä—C5-t^_õÍÊBýNÛÜžU†x‹óvì¹(2àX¥&TÚÓX5®*$1ÜlK‰Zx±ù­Sbè·N ­Ý:EdÓ)©¦­SD£1S ·N‘¬<›þ_܆G¼ÌðH.3<@#Ý° +$†;€m)ˆ8¹‚’…ÂKÌ(½ÆqW@È/Újˆ rAAãý,$t!]+nHE~Y] ‰wc°ß%¯/.Þ|ÖâSB£Ž æ0_¶#­q¦]EmOÍú«¨íµßü!¢®tÖ_EmOÞú«ˆöÀ'+bç[w£ºâ­}>ºk;^ÝïÛ&>DÍßã-?DmÏu*Z›ím‰·vÉväÿÓÜ~út|æ̓ÿäãåíüæùøêC£Íð|¦þl<.|·+éÊ%žÏoÇ_žÂãSæêËOþñ©ÐZÀßÝãSãóJϤ­|ËèáŒ}udáêãÏ8¾}&|ÿõ—:®§ËSõíxcXš8{ƒ¾7»Y¨$§+•_ø÷ˆÆ¢eAÓ„b­*ÃC4´ µt®µëŽæ˜2{,€î<ÆûIëeqS ã·‰d·Yp†[åG\c1—ÐAeå !#Çâ°JD¸óÜz–(Êmi&²žh³ yO‚K¹õØæeÔ‡ã=T7Õ2 È5‡rS°JD¸ó˜Ü®&g_妚‰Œ¯„nÕ §ðÍŽÁ ÕzY8õu&—SË~â\•‰æ`“;jfRG˜n˜Í@C«|*yu†¯ÑGÕZe*¿9‰³‘%åÒLP+Â=ƒÑÊ-Šm:ïèÇØ$gÇ%­~å)³Yâ€bËætqÆ"ÐRi梊(6(¤Õ]ú_è¤5¼ö8#”2õÕSEGÛx:IvãÑ¡ºÌÏ.¡P•r‹LÛwI“„ZLA܇àó4äïÄ…îbË‹ÈôhâN™Í‘@†ø`2 ƒ2QQ  ¿ºp²™¡1Ì¢iæÏfÜ!O2* +u´3º’Ñäh8¸ì“LË‹I3=Êï`Ù H­À©Lht‹7tíµý ¢ð 8à œˆ¾˜ ?,šUE=8xÚ‘k9#_ÎSçû^¹ ¡¡‹©¿J;#ô8úMytFžçù©ñü0•è| h r.òídæ€5:*™j!¯qãÿKq­Øq‹\Oq­L|Ó"›qS 7 òÊ PèÈ Êsë|Ž¥ó¡¤&;ÝGh-„‚O¶•ÿÛ%Ïb€ì;:H5i”ðnö92@&‡J—Óê—ÝdúøG@^)D£ $‚‚ÜXXf­£"ÀMÅ1JAåT45Ír’üÅÕ† 3µÜ@d“€ +rca™ñ8ÏzD—‹bH…°d'¥¢šÃ\LÀ/ó¹¬1ÙŒ,0F c¨)T#  7Ì,¥j7ß%Î¥šäÛæ›RÙ6ßq4›/ÉÙ¸g]åû\¦—×\ÃW\¬ò+3,êmó½Lr´›ïJŽ·Í7ñÅÓÐÈþ¶ù&þ?9³ù.y’QQШ» Ñ•Œ&Gs¼m¾7ì“LÏ‹I7=šûmóM¹m›/]”o›/ÉvóUQxT»ù._àlÛæ»òâ›QUÔÌá×.Ü$Rg{çæñÊØcé‚Lçúqçþú1º‡ç×><>¾bûJ:ªŠÞËÃ_ƽ:dº’·p>yºovª†ç¿‘ã¿ûîã·ÏÓ‘¯ì-u„縑óß¿Ë&& +endstream +endobj +2131 0 obj +<< +/Filter /FlateDecode +/Length 177 +>> +stream +xœ]1à EwNÁ ‰Ô,KºdhUµ½1!Co_I‡ßâë›Óur6Qöˆ^½ QcŽ°ù=* 3,Ö‘¦¥ÚªtP©j•°ñ&Ãû€â˜Êw¹{6—ÒiªGy [ +¢t s1#8ý÷ÕUÃlŽÉV‹"α’¡Cc×fÄb/Šû²íôåÅ9ሪ=Fp©œQbæxÖÁïÒàCvQùðY¢ +endstream +endobj +2133 0 obj +<< +/Filter /FlateDecode +/Length 197 +>> +stream +xœ]Mƒ …÷œ‚ˆFmL»qѦi{ÄÁ° ꢷï0j]¼ Ìß#k»kçÝʳGœÍ Vn",ó ðFçY^ðÁ™õ ŠfÒeíM‡÷'Ç°;ßõÙ3¿ÐM¾×˜y€%hQû˜BIk?ü=Õ{AoÌ3“„ÀÈd™+’+Äš°Jh ›ËªP²*â ±Q$ĆFŸCÒÉι=7[ŒàWòLž’çá÷-a©Š£ØsÇbã +endstream +endobj +2134 0 obj +<< +/Filter /FlateDecode +/Length1 9528 +/Length 6467 +>> +stream +xœÍY{xTÕµ_{ïsf&™Lfòž$™ÉÉ„À$&„wÉä1á‘á!$TÌ$!¼HB«L¨E4 p->¨ +Q/`ËÉDh@¯D¯¶­«µêµ+VÚ[ÚÔWkÌýí“€r¿~÷»ý¾ûÇ=;k­½×Z{¯µ×^gŸ½'ĈÈF$¨zÞÂüB2ž¬6 Åëê[†Û€U7nls6ç½-h¼Od~yEËÊuSÚæYº‰ÔÖ•ko[1¬?ýÔÎUMõË_½t¡‚È+™“W‘06f/Ql¢4²j]ÛwFìý (~mscýp;s(v]ýwZL«‡QŠ¶ûÖúuM#úÕÒ©–æÖ¶‘ö€”·lhjùÅ +ßçЯ!²;ÕûàE¹£ÄnJ'Š|88?4;rI½…´¡5‘‘€ÞÏŒ€töAÚGY4ÈÆÓKÔO³é•P5í¦tšŽP,ÝÆ^#A)§§ÈË\Ä©‚R˜J{è]º‰6ÐÇ4@9TI°xŒ¤J¦i‘ßWÒÝ‘ãЊ¦2ú`kÙBÊG}&Ïe>XÞé§ʉ¼y­Çèc–é¡™¨ý–âh uÐ?S<­¡W#—ä´©²Mìwä¡mW&*]‘[èz:J¿d•¨Í¡ÛÔw¢ŽÒZôz’¥°þÈÙÈ'ô‚¨ #}—î†Çaêç׉2µ›Ü”M7Ð\ª‡ôŸè]–ÀÆ‹@dL¤4²܃ô)÷ñŸ3üðÑ,ª£{éqDãm:G_0+›Äc‡PÞ`Tßo•ÔN·#¯CôÒa:ÎƳñ<…§ Z)4–n„l'í‡ý^:Ã*Y-ëg/ŠýjÁPq$1’ù$¡qT÷Ñ‹°ñ9+€,ˆLѦd(mjáå-˜árz”ÎÐðãÄý ú+‡ò!¿ƒwD–DžŠ| _,䢩4Ÿ–R3m¤oÓXÕ—èeú3»È£ yZyE½]ŒÜØfS)|Ÿí…{;V)L}(oc–qÌYLesÙ¶’íd²>ö.{—›¸‡¯ç¿ºxM¼¯LVÕHFJ¦ ØÕh ­Â +Ühßù>E¯Ð)–IJYfô6úɯçå(OòÓü±UìT.©w ýçÐÅH™‘e3‡vzQøK†cÙÖÊ>‚ç»ø³"V8„&&‰±HÔŠ»ÅnñïâçÊåòž:K­W™ë‡nz#Rùž|É¿ÆP.M¤)ÈŸȦ[à_ ÊÚD[¨‹îC¾ÜOÝtó>I§è—ôkúV€˜>¯†õuȺ­ì>”=ì0{‘½ÂN±Ù—²ðL”>™ó2^ÁWò­(»ùþ6?/F‰FÑ!:QöŠcâ]…E‰¨…(3ÕíêAÓkæóLsƒåg—.\w¹öòC4”6ô­¡‡^ú$²8rü÷R]O·ÁË=ÈÁý(O#ÑOègô+Ã×Og*2ÞÉ4dC.V­˜Í`³Pæ°ù(7¢,aKQêY[…ÒÁ:ÙwÙì{ì^ö€QÆÜö³²c(?f'P~Éβ߲߳O9’˜ d³—áù|fZÆgðy|ÊJÞŒÒÂ7ðX¡ƒ¼—ço‹áy¢^¬{ÄÄKâ-ñ•Â•\%_ñ+‹••ÊÊiå åå¢êRƒê*u¯ú’)Ý4Ñt£iéaÓÓyÓ%³É\mn0o2¿eŽX¼Ø­~Šy¥o>ù¦Ó¬UMT¾ÃÏâ½pŠu»3ñEb­¸OüB]Á…›½ÇºÄjqKäIQÁÿ*šÙb~’e +—Z$VЊ°CüCþ9ÿDIb‹øïXŽòÏìǼY”q“4¢¾©$)wªç‰ø¯¨ˆofýüq§¸3ò¯T¤îegÕ½ü r+<Îâ­ÞÆB§ŸóÕ|;Õ(Õ‹´qÿ¡úÄ{:¿›o){éc¡ñÏØ {»Æël¶’ÅoæÓØ!츗Y]`ë©…=@öû5ë#ÆžYÁjéÜƦà3öºð°·D4ÕJY6ObÕ|ß(ž7“Ã.ñ º V€Ü¹ò Ñ­xvó1ØÓ‚ØMÞd…䤇°ß>ô¼Ü±ÕwÔíȳÇE.- ZÆ_£"¼£ÔÐ]TH'ƒwS˜6E:Ùrìûs°rêck(ŸY±[¦À·|/’y&öÂ:Xý+öÿW±ëW²?Ò·™oV?å(R²C bg +aÿÝŽ²œ–¡õ(Ýo:ª¾IóX +‘âÚ‹,ŸnÆ7ç#ØO#?ü[J+¹ðÚy=z<:4“(wÑkŒÓfø<ïyµ2;5˜áj|£ªðM¶t~ ê÷–kµný‚QŸcÔwuê:¸ƒÎUån…ÜA½b㪮`¨ÃõX£Ë´²¦è¼\ꉶ¢jEMOÑZzXÊtfTxJ°¨‡“ŧô4­<¨§jåÒ]xƒõËõêù5Áòt§6/WgeZƒNZ©n÷*Tf˜ÑMeºÙ0ã^-gCÛÝ=¹ý];úÔòÅ,×–×ßT£‹úZi#λåzÊíçœ_71x|YͶoJÓEWйÚ-›]]ÛÜz÷üšoJ=×Öb {+B]0¼!¬\è†-¾µ¶Fg[aÐ-ç!ç4<»&-(9¡5n=J+ÕVu­ aaÒºtZp›'œ–8 ´ »kQæÑ‹ÓµÚúòQ=‰Ôµà¶ÞÔ€;õZI^n#n8¬=±ö‘JŒí›•¦«2£f¨ËZå‚«qeÒ#mÒAw7ºáI†9M•¨i*u5N…žZ†^úr¬Çj=ª,Ôå(ß!ûëª×¡¹»¾ ¬¿vá×rêG8&¯ã ’U™%W ò+uÝçÓÇ“ b.ÊÂÇéF{R^îÆ>®k-7ÂGÕˆm}mQ>‚ïñÈåÝÞ 4ôÎù5Ãm75¤‡)ï«ÕyHJú¯H’n”’Î+’«ÝCòøY’7‹$Ý’}õÏîHN®*ÒYòÿ n–W.Ô*ç/­q»B#±­\tMkX>õªl¤Æ†¸®x©YRoÁÒÉÀŸê­Ð‚«C3ñªÁG=¡¬F¤óÚáOÆPÈß›®Ž,51r,Åk2òyŸÙ‚68Ì]¡;B3‡qm´Çó¿ìÔ”½ òu·‘9éE¾kÛ×_Ӿƽ˜.‡•l^¹hiWWô5² +lV]]š»¢+ÔUßélÐܭ븨5]-ÁЕå؞®Wì¨Å$V±¢Âà˜èS|}šIÅ÷Î{L ίÛòã‘~ñao0Xèõ]gÐpÎØBCNUø¯âC~QgÃÉé†äƒpiéHeòÔáJ︼³%Ñâú€‹ÄY|TŒ^½9×–ØÀ`â²ãï¢nñkÒœâ½Þ¬ìÂ}'ÅÏ UœÂ÷Tv;¶ÅbÀŸŠã”îÂ9ôèˆähol\!•´Š{1½~à3€À @¡fq:;G +Ù]€|À<ɇÄ!ø¹ýíÀù€fÀN€B‹ÄÓàß"±xJ¬Á‡Ñ%vàbšº]|ß ÿšúø¸>ˆÇÑ–tßHûP)ÿÁÚÉ ЇÀO}иðºÄ#í¢Ýè×6B»Ek8Ãå(É€Ü (Ôv£¶¡Û3ìÖ–z@ A× S„ksØ£k´¹7%µ°!ÝŒÐoFä6#r›IhÓMÃ:ybt6Agt6!*¢öZ±`ì¸qoEÜ%_îœ1øßÞè–-ñmÄq,¼ºG¬ ç¸d+{§ +‹ŸÃ9–aؽ©£ w~ÝŠŠ–‰;BíR·É6õFÅHnSoÚèa +­[JbE#ý€S"p`"  ˆÆpV¾ë„˜Kë,ˆuuðÑ¡t¨JA9‹?) +©ÚBHÉx‘G~(ŒuÕùÙ”PTKTg”pD¹£ +¢QÕQj3®D;…p‰|Q,æ‰:¡öEúÃæ¢ ¦¢ »¬ÝVÝÚo=cUuS¿éŒiÀ4hRݦSÀTm +™ZL¦]¦nSÔ.Ó.3Y[¬Vá°º­Ö€µÚªºÌ¬»d«h/-°ÐØPã:ðÝâf@V£¡¸|&´€3¨€ªhÙ¡g‡ž\;¸vp XJª!@ˈÔtUr¥Ô”ÀHcÁEl€e 0-Z6´lÐ:Ã/ÁC°P o€¬¾"+‘‡&C>hè\‘d_~)P?¦,ÓDzî±l×Xð—2âããë´:o]NÝ~¥Ykö6ç4ïWæió¼óræíWŠµboqNñ~%_Ë÷æçäïW\šËëÊqíWvV©:YuºJ©«j®ê¨S°t½a_A¡A3½’ §¦N±—ÜÀ`:uÀûg‚ìÀ.@> Ð Pùƒû ¸Ï€û ÍÔTôzFn1À®™äï3d²&åü¹À䇋&Ì+©Â¶[Øû0ä‡ íáÚƒ¯üy#úÝ_j¹WúÉMp©±Ý-Åk¸”Šu€€J§Å: ÀèÀ.@ à@KQ–ˆ%ü”Ãü°È ØÆ'¹(9Yþgq”8x rÁÆž2ðþÇÀÅÎ +Äζ}9ÛöÂlÛ]³mcPá9øÙØn{ÖÛ³%¶y%¶±%6Œ–B²ñ$›$fÿià¹Î $zl_ylŸylöØóØÖ{l7xd¿Qx‡m<ÑÀV‰qI•x¶³V—í'.Û—mŠËVbc{¬S©3 œ.1ûôY{¹¢žcŸR9FbaÿXW'ƒ°HØ_2öϹöïù[Øÿ}×óì+f|ÚØ—á¬s®’$ö9›¥Èög#ôÏl®K.\žgáâæbÈϼ ÿöo‘úO¢ÿÐ~‚2-Rÿq\%ÝÇfüÇFú=Îm€ÕG¹·Áê(×°úP8÷¸ßçÞr8w-ÈΰW:¸&ìç*‰c+)‹KÝFòréIՈřy-èŒáÎÁp®ìU. ô±²°6dŒôòy¦QµaÎÖŒIŽ&Íbi†Óéä5h,³ÎÛ(Ó –°¶£˜žõžsýÅÿœœ8}Áìá½®žÇü£ù6+|ÈõÆq®°ëtnósý\{ÎõJV[võçöY 8™ÛÇÙQW‚¬C—³c®#¹+]Ïh†t¿)–zŸ?Ïõˆ¶ÔµÇ‹vص%÷yé­ÃŒC\›;ÝUå?äªðö1ˆ~ D»Š´ ®i`Oíc³z¹ÆgõIW +0Æ¡c®q°˜­®Ü8åŸDfÖÈ5·™Ì‹ÍóÍ×›'˜óÌnóhó(s¢%Þâ°ÄZb,Ñ‹ÅdQ,ÜB–ľÈ@À'Ïm‰&‡ñŸ"±bÔ\bnüôGœY8Þ=ATòÊ…¥L¯¤ÊE¥ú_eŸ9²@Ÿê«Ô-Õߪéaì¾Z´t~w£E5HPÉÚš./wlj±ü­÷¦Kºië½µµ¬Rïo¤Ê·þåBÌ#‡TU+uRòÆbgqüô¸iå…F°ïëÇéûæã­?X¹°Fzt­^(+‘ѵ•ú y-<Î×óæ`ùqÞ"ImÍqv;_\ ùìöòÚ«j”É[ F~I¤Z/eJ5Êd½†Z•¡†4Í –÷df+½ÄfI%¤ÏK†ÒÊá±²`cUK5žAYÆXY[VÜŒc´RÝ¢¼’¼)ÂÛ)E±òç—‘óŽë=é'ØS#"ØqZ)ùÈ\]~õ¯µµµMB{»¸­ÝiðÚðÒzVêò²è×ýA=*¯er9ÚGž²š€ã¤ÿ´Ÿ7û;ü;ýûüGüj{{-Øñ'3OgòºÌæÌŽÌ™û2dš¤à¦šcÿ¾Ì?eŠvdkÃ,7l¶ƒâO6ÛÚ[åC0Ð +6çk÷•Õ”dR#N½ 'ô’ +U»6¶B’Ám•ÝÚ%½ªÇåÅY5ÎÌd¦9=œ=Ç_ÀaÕÌO†IUúø Ï +Š6ËÊQF©“zrN‚¥(v »™œ>Ç—þËþ¹ŽÏýs.û©uÇ% ñž8Oœ[?]r‹þK•.’[é76ÿ…¬“×à$¨8àæjçèå“;TÆŒ nìVÍBlëfg˜‰õ±‰G©SY´TZ»¼ÌïðSþàñl™/Á“äYÈÕËyÊCÆ픋éRWá¼Sø®Ùi–âuÃDg(U"{FròX³ß<ËüC³)àþ–²Ôò­”¥Î[,mqmñZ‹ÝwØz8ö”z*åß寮ëp¥|•’„s€’ª¦'¥&§¦Œvš£R¬Në艩3RïIÙé6;S9OIKI5ÙD*WMΔä¤Ds‚bëc«QQĘâÎ(Õ'&bjÚÎT¶/õH*O=!& ž÷ö2“ÑÇî ØÈô›y u Í  JB3˜T¹îN·¹»ÝÜúû +±³±@ ±Ž7󾓟ä§ùYþ'ná©®ì>Óéú,›sÎa®cÙú/—Íù|ÙÇ,Ò…ËËÖû‹/¯÷‹‹O™?¦qG¬›CÝürìˈìú Ëh™L,ò1á™D4ib¶–i2k“'O(”S3™¹ÙS8yòq¨îÒ«gî½·.ß—íM=ýÈþ_Ì>ðÕtÖ°vIES‡.zY){ø‡[´¯?þ“·v­\ùÄÑ¡Á©ŽñyÆz b½LX¯dz(p†œÝΧB΀“o¤»ˆÇ–$°Õ8µF±nÊD¬dÝ‚º†Î%;[MÉàû4€ó˜Gq¦FYb¸ ì/PŸˆµâ&Ø;ì»ìÝvÅžšr‚g±s#òùç8.œsÈì-öÁ˜F_\¸Ä¾ðùÆ ë—%x'Ä%&'§$y&Mç“â&ŽÉF(̃l¶'ÁÓMMŽ6{Ó¼¥ÊO¿¸mÃÔ îõòÑãoçïïçÎpá¤C³#çÅ1Geщp½ÅSbXU“$±ÙÒú˜=•FÙlÈewgd+Ùq’[GÍÔA;©ïiª÷Ëøze‡×u–ôBñ…ñe·ªX––•™ÅMœ ÆMfï¨ôÑéé”m÷Z³©)©ÜäQâÈeJk`‰±¨%Ç –ÅÜ ,ÝïHj Ôh ã*Ñ8ÆÛ’01~ +–?%9.‘›´Ì1ÙS)É +'O™,ƒ2FFÅÄgïh[ztÓ#w¿ÙðÒ–u/§­ŸÜ–q]AÖ´±Eå“fNä{ϳy Jö½2täCÇøøÅ¿ ïy ~Ãa6íü#­ž=jäEUäœØ¯V‘•ž9NJd 7!}ºjœ.QIµ0UŒ‹*¥€-d붽ÊNñwØ;|ÀfM#fed ØW—…@šà‰BpEØÔÀŒIêo˜ Äô¹3õ±=Ǻ­Ìš£žàçqÿ$ƒ³ªPª•nEUžç¿¥˜‘;¾\váœ÷Ïå;äs\ðû·©×ù¶Ån¾òÆl@¼°#1y +ó˜Ÿó_ ù[ØCÛ×,š0Z­ÊþÛ Ê+é×…¬Ã¿]úi ^gù\Þo´ùȯ›‰ò'/! `¢ìÁŠàÆ?GÿÁžÿ…&X‘³œ‰çI,¯ÖÆåbøQ ~¼ü÷8É߇㈭^×Ô:·éÛ š×ÕßZ½pÎ"£'ÛõDÊrms#×0F~6Mc£øÓ´ÎåBi6U ¯éÿðȾü?ôèGNÔÙý_XR‡=ñÑè—$=zC/]l»¼ÃQd‰E3ꊭÿîi +endstream +endobj +2135 0 obj +<< +/Filter /FlateDecode +/Length1 4304 +/Length 2777 +>> +stream +xœÍW}l[Õ?÷=ûÙÏIì÷e?'®÷üòÑÄvÜÚIwiÒ6mÚ® M[l(…$MK¡…¬”°nƒµò6J7TCš˜¶ñ»Ïõ4`@µ¡IÛ*6uÑ61&±M­CiÒ@$Ù¹ÏNÊ×Ôí¿Ùò½çwî¹÷žó;çÞ÷ šàð0¾c"çø6{¦LÎÖ°? @ž>~ÌhP@Åkø{äÀìÁ#Jú»8v€›9xøž5{÷ÝÞ'o™™ÜÏû¾y@z•ý· Â3ͯà/"n»åȱ»köÒ86+ß1=YßOÃ&tdòîYB¥ hÏü1nŸ<2S÷홽ãÎcõùlcöèÌli²e;€+Œ>ÝÈFÈyî,$bøíÀo? ÂptÛa7~÷":·Ãâka¨Ð ¬ƒk  nƒC0ûA† 4ÃÍp¤a´Á>È€üp=l‚‡ákp +Jp¾³p<ðydö>ø<ࣤ n¥Ýãºåx‘‚µ.L…DamÑÑÝ_4.R¢ö„S”$?ÑÆDŠrÉ­; ­¢™¢|òPØ Ãã“SÔ•dSM˼·ðçÈoŠ´+ÌGþQŒX&u' +tôxÑ(q=w²iïõ)*$í89…»§öîPÀe'Á48-3Þ]`í΂5…Þ[#ìˆ5‚Ìï,”±×O”‰A°£Æ4mžY±´W0IQ‹¼`“rŠÕ‹°_øÃgæƒÂ%¦ùÈÇçh|äï0,lÆ +çsbS½ã}à~Ž,>DÉ×a+õŽlB.Ú£,£TÂbÕv¢p¢Eæ÷€ºŒ”Ol >ccÙuK8@46¾¾/,B{qPÌׇ z¯ +8è‰×ð^4°5ê AC¦š4®¨Býu4Ôúû¹„ÍG¨"Ô¬e¨°}UgߺFF¥yJåÀ2 0{‰XWìƒLãÿð +!¦ r¡+ -Ã0B=¹‹TK° ¸ ¾ø<×Ͻ Þ%YÈÁóPî&¨´5êJÐLÚî÷T©ž¶ÁS-ëàKœÝ´/mg]UBÒ43Wé—!€ÖýíD¥ÅAå–8³n‰¢u‹d§HÂîÔ«ç|)o¢"Jp'tJ¶†z^«R^²s8wµ3×Σ֗’ªç©(Ÿ[wŠJÙóù<ÕdêɃ­·ÉŠÍ‹ùüªÕ$ +j~âî\ç@âѶõ 6˜_ÎôÆ[Hx“üèi¬“;«ò©‘7Îç[9UåÍkž}ilFTÕݬ6¿þ†îWüXå +X‹ïr6Öy˜0eŒ+Ë\IÛË_=KTÍPAÂçl‚Z™ +Hpî ’­ã½’`Üåäl7Ë_SÞVÙí¤°Ã‹Ç5A>Vš¦îhãB”Ù\–¼¡‹+מ1×,\ÎÇŒ§òÍMý»Oig»DýÒZëÕµmüREš.õôúŽ¦àëÖ ,sS@nºÀ†²È2È#L¨4ÔÈPC-¨dÂÒÛ¢²àZð4Vb]¢Š Å¡®Óvµcô±.&Æ⌡î4õÏÙ€ E3e?{Ö û‘!ªd íHK> stream 0 g 0 G 0 g 0 G +BT +/F62 9.9626 Tf 150.705 706.129 Td [(vectorized,)-267(it)-264(does)-263(not)-264(necessarily)-263(make)-264(optimal)-264(use)-263(of)-264(the)-264(memory)-263(hierar)18(chy)111(.)]TJ 0 -11.955 Td [(While)-228(pr)18(ocessing)-229(each)-228(diagonal)-229(we)-228(ar)18(e)-229(updating)-228(entries)-228(in)-229(the)-228(output)-229(vector)]TJ/F67 9.9626 Tf 335.99 0 Td [(y)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)]TJ -341.22 -11.955 Td [(which)-290(is)-291(then)-290(accessed)-291(multiple)-290(times;)-311(if)-290(the)-290(vector)]TJ/F67 9.9626 Tf 229.535 0 Td [(y)]TJ/F62 9.9626 Tf 8.124 0 Td [(is)-290(too)-291(lar)18(ge)-290(to)-291(r)18(emain)-290(in)]TJ -237.659 -11.956 Td [(the)-250(cache)-250(memory)111(,)-250(the)-250(associated)-250(cache)-250(miss)-250(penalty)-250(is)-250(paid)-250(multiple)-250(times.)]TJ 14.944 -12.068 Td [(The)]TJ/F60 9.9626 Tf 20.923 0 Td [(hacked)-426(DIA)]TJ/F62 9.9626 Tf 53.49 0 Td [(\050)]TJ/F59 9.9626 Tf 3.317 0 Td [(HDIA)]TJ/F62 9.9626 Tf 28.224 0 Td [(\051)-426(format)-426(was)-426(designed)-426(to)-427(contain)-426(the)-426(amount)-426(of)]TJ -120.898 -11.956 Td [(padding,)-416(by)-384(br)18(eaking)-383(the)-383(original)-383(matrix)-383(into)-383(equally)-384(sized)-383(gr)18(oups)-383(of)-383(r)18(ows)]TJ 0 -11.955 Td [(\050)]TJ/F60 9.9626 Tf 3.317 0 Td [(hacks)]TJ/F62 9.9626 Tf 21.758 0 Td [(\051,)-271(and)-267(then)-266(storing)-267(these)-266(gr)18(oups)-267(as)-267(independent)-266(matrices)-267(in)-267(DIA)-266(format.)]TJ -25.075 -11.955 Td [(This)-256(appr)18(oach)-256(is)-257(similar)-256(to)-256(that)-256(of)-257(HLL,)-256(and)-256(r)18(equir)18(es)-256(using)-256(an)-257(of)18(fset)-256(vector)-256(for)]TJ 0 -11.955 Td [(each)-283(submatrix.)-411(Again,)-292(similarly)-283(to)-284(HLL,)-283(the)-284(various)-283(submatrices)-284(ar)18(e)-283(stacked)]TJ 0 -11.955 Td [(inside)-313(a)-314(linear)-313(array)-314(to)-313(impr)18(ove)-314(memory)-313(management.)-500(The)-314(fact)-313(that)-314(the)-313(ma-)]TJ 0 -11.955 Td [(trix)-337(is)-336(accessed)-337(in)-336(slices)-337(helps)-337(in)-336(r)18(educing)-337(cache)-336(misses,)-359(especially)-336(r)18(egar)18(ding)]TJ 0 -11.956 Td [(accesses)-250(to)-250(the)-250(vector)]TJ/F67 9.9626 Tf 95.959 0 Td [(y)]TJ/F62 9.9626 Tf 5.23 0 Td [(.)]TJ -86.245 -12.068 Td [(An)-246(additional)-246(vector)]TJ/F60 9.9626 Tf 93.666 0 Td [(hackOffsets)]TJ/F62 9.9626 Tf 48.566 0 Td [(is)-246(pr)18(ovided)-246(to)-246(complete)-246(the)-246(matrix)-246(format;)]TJ -157.176 -11.956 Td [(given)-321(that)]TJ/F60 9.9626 Tf 48.303 0 Td [(hackSize)]TJ/F62 9.9626 Tf 37.686 0 Td [(is)-321(the)-320(number)-321(of)-321(r)18(ows)-321(of)-320(each)-321(hack,)-339(the)]TJ/F60 9.9626 Tf 180.964 0 Td [(hackOffsets)]TJ/F62 9.9626 Tf 49.311 0 Td [(vector)]TJ -316.264 -11.955 Td [(is)-321(made)-321(by)-322(an)-321(array)-321(of)]TJ/F93 10.3811 Tf 105.045 0 Td [(\050)]TJ/F60 9.9626 Tf 4.274 0 Td [(m)]TJ/F62 9.9626 Tf 8 0 Td [(/)]TJ/F60 9.9626 Tf 6.336 0 Td [(h)-40(a)-25(c)-25(k)-30(S)-18(i)-32(z)-25(e)]TJ/F93 10.3811 Tf 36.682 0 Td [(\051)-235(+)]TJ/F62 9.9626 Tf 16.868 0 Td [(1)-321(elements,)-339(pointing)-321(to)-322(the)-321(\002rst)-321(diag-)]TJ -177.205 -11.955 Td [(onal)-289(of)18(fset)-290(of)-289(a)-289(submatrix)-289(inside)-290(the)-289(stacked)]TJ/F60 9.9626 Tf 198.567 0 Td [(offsets)]TJ/F62 9.9626 Tf 27.788 0 Td [(buf)18(fers,)-299(plus)-289(an)-290(additional)]TJ -226.355 -11.955 Td [(element)-350(equal)-350(to)-350(the)-351(number)-350(of)-350(nonzer)18(o)-350(diagonals)-350(in)-350(the)-350(whole)-351(matrix.)-610(W)92(e)]TJ 0 -11.955 Td [(thus)-315(have)-314(the)-315(pr)18(operty)-315(that)-315(the)-314(number)-315(of)-315(diagonals)-315(of)-314(the)]TJ/F60 9.9626 Tf 267.65 0 Td [(k)]TJ/F62 9.9626 Tf 4.598 0 Td [(-th)]TJ/F60 9.9626 Tf 15.499 0 Td [(hack)]TJ/F62 9.9626 Tf 21.068 0 Td [(is)-315(given)]TJ -308.815 -11.955 Td [(by)]TJ/F60 9.9626 Tf 13.539 0 Td [(hackOffsets[k+1])-250(-)-250(hackOffsets[k])]TJ/F62 9.9626 Tf 133.667 0 Td [(.)]TJ +0 g 0 G +ET +1 0 0 1 197.579 381.801 cm +q +.4451 0 0 .4451 0 0 cm +q +1 0 0 1 0 0 cm +/Im10 Do +Q +Q +0 g 0 G +1 0 0 1 -197.579 -381.801 cm +BT +/F62 9.9626 Tf 198.751 359.883 Td [(Figur)18(e)-250(9:)-310(Hacked)-250(DIA)-250(compr)18(ession)-250(of)-250(matrix)-250(in)-250(Figur)18(e)]TJ +0 0 1 rg 0 0 1 RG + [-250(5)]TJ +0 g 0 G +0 g 0 G +0 g 0 G + -33.102 -24.137 Td [(The)-250(r)18(elevant)-250(data)-250(type)-250(is)]TJ/F67 9.9626 Tf 110.952 0 Td [(psb_T_hdia_sparse_mat)]TJ/F62 9.9626 Tf 109.837 0 Td [(:)]TJ 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +ET q -1 0 0 1 99.895 695.17 cm -0 0 343.711 16.936 re f +1 0 0 1 150.705 120.326 cm +0 0 343.711 203.238 re f Q 0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F94 8.9664 Tf 102.884 701.446 Td [(psb_i_t)-525(psb_c_)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(<)]TJ +/F102 8.9664 Tf 163.108 312.904 Td [(type)]TJ 0 g 0 G - [(s,d,c,z)]TJ -0.40 0.40 0.40 rg 0.40 0.40 0.40 RG - [(>)]TJ + [-525(pm)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - [(global_vec_write\050vh,cdh\051;)]TJ -0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 14.122 -10.959 Td [(real)]TJ +0 g 0 G + [(\050psb_dpk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(data)]TJ 0 g 0 G -/F54 9.9626 Tf -2.989 -24.209 Td [(that)-250(pr)18(oduces)-250(exactly)-250(this)-250(r)18(esult.)]TJ + [(\050:,:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 164.384 -586.799 Td [(149)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(end)-525(type)]TJ 0 g 0 G -ET - -endstream -endobj -1856 0 obj -<< -/Length 1109 ->> -stream + [-525(pm)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(type)]TJ 0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(10)-1000(Preconditioner)-250(routines)]TJ/F54 9.9626 Tf 0 -22.702 Td [(The)-228(base)-227(PSBLAS)-228(library)-227(contains)-228(the)-227(implementation)-228(of)-227(two)-228(simple)-227(pr)18(econdi-)]TJ 0 -11.955 Td [(tioning)-250(techniques:)]TJ + [-525(po)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 13.888 -19.925 Td [(\225)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 14.122 -10.959 Td [(integer)]TJ 0 g 0 G - [-500(Diagonal)-250(Scaling)]TJ + [(\050psb_ipk_\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.926 Td [(\225)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G - [-500(Block)-250(Jacobi)-250(with)-250(ILU\0500\051)-250(factorization)]TJ -13.888 -19.925 Td [(The)-356(supporting)-356(data)-356(type)-356(and)-356(subr)18(outine)-356(interfaces)-356(ar)18(e)-356(de\002ned)-356(in)-356(the)-356(mod-)]TJ 0 -11.955 Td [(ule)]TJ/F59 9.9626 Tf 16.301 0 Td [(psb_prec_mod)]TJ/F54 9.9626 Tf 62.764 0 Td [(.)-350(The)-263(old)-263(interfaces)]TJ/F59 9.9626 Tf 87.314 0 Td [(psb_precinit)]TJ/F54 9.9626 Tf 65.386 0 Td [(and)]TJ/F59 9.9626 Tf 19.489 0 Td [(psb_precbld)]TJ/F54 9.9626 Tf 60.156 0 Td [(ar)18(e)-263(still)]TJ -311.41 -11.955 Td [(supported)-250(for)-250(backwar)18(d)-250(compatibility)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 164.383 -497.003 Td [(150)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-1050(::)]TJ 0 g 0 G -ET - -endstream -endobj -1862 0 obj -<< -/Length 5016 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(off\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(10.1)-1000(init)-250(\227)-250(Initialize)-250(a)-250(preconditioner)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -14.122 -10.959 Td [(end)-525(type)]TJ 0 g 0 G + [-525(po)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%init\050icontxt,ptype,)-525(info\051)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -21.918 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + [(,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(extends)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ + [(\050psb_d_base_sparse_mat\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G - 0 -19.925 Td [(icontxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(the)-250(communication)-250(context.)]TJ -11.058 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 28.343 0 Td [(global)]TJ/F54 9.9626 Tf 28.782 0 Td [(.)]TJ -57.125 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 24 0 Td [(required)]TJ/F54 9.9626 Tf 39.292 0 Td [(.)]TJ -63.292 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(value.)]TJ + [-525(psb_d_hdia_sparse_mat)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(ptype)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG +/F120 8.9664 Tf 9.415 -10.959 Td [(!)]TJ 0 g 0 G -/F54 9.9626 Tf 30.994 0 Td [(the)-250(type)-250(of)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 151.121 0 Td [(global)]TJ/F54 9.9626 Tf -157.208 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)-250(string,)-250(see)-250(usage)-250(notes.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(On)-250(Exit)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.959 Td [(!)-525(HDIA)-525(format,)-525(extended.)]TJ 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -19.925 Td [(prec)]TJ +0.38 0.63 0.69 rg 0.38 0.63 0.69 RG + 0 -10.958 Td [(!)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -30.874 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 337.631 446.268 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 340.77 446.069 Td [(prec)]TJ -ET -q -1 0 0 1 362.319 446.268 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 365.457 446.069 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F102 8.9664 Tf 0 -21.918 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(info)]TJ + [(\050pm\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F51 11.9552 Tf -24.907 -21.917 Td [(Notes)]TJ/F54 9.9626 Tf 34.311 0 Td [(Legal)-245(inputs)-244(to)-245(this)-245(subr)18(outine)-245(ar)18(e)-244(interpr)18(eted)-245(depending)-245(on)-244(the)]TJ/F52 9.9626 Tf 285.595 0 Td [(p)-25(t)-25(y)-80(p)-25(e)]TJ/F54 9.9626 Tf -319.906 -11.956 Td [(string)-250(as)-250(follows)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 72.358 3.617 Td [(4)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G -/F54 9.9626 Tf 4.284 -3.617 Td [(:)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -76.642 -19.925 Td [(NONE)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 35.965 0 Td [(No)-250(pr)18(econditioning,)-250(i.e.)-310(the)-250(pr)18(econditioner)-250(is)-250(just)-250(a)-250(copy)-250(operator)74(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -35.965 -19.925 Td [(DIAG)]TJ + [-525(hdia\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 33.205 0 Td [(Diagonal)-371(scaling;)-432(each)-371(entry)-372(of)-371(the)-371(input)-371(vector)-372(is)-371(multiplied)-371(by)-371(the)]TJ -8.298 -11.955 Td [(r)18(ecipr)18(ocal)-266(of)-267(the)-266(sum)-267(of)-266(the)-266(absolute)-267(values)-266(of)-267(the)-266(coef)18(\002cients)-266(in)-267(the)-266(cor)18(-)]TJ 0 -11.955 Td [(r)18(esponding)-250(r)18(ow)-250(of)-250(matrix)]TJ/F52 9.9626 Tf 116.148 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(;)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + 0 -10.959 Td [(type)]TJ 0 g 0 G -/F51 9.9626 Tf -148.373 -19.926 Td [(BJAC)]TJ + [(\050po\051,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 30.446 0 Td [(Pr)18(econdition)-211(by)-212(a)-211(factorization)-212(of)-211(the)-212(block-diagonal)-211(of)-212(matrix)]TJ/F52 9.9626 Tf 273.867 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(,)-219(wher)18(e)]TJ -286.723 -11.955 Td [(block)-347(boundaries)-348(ar)18(e)-347(determined)-347(by)-348(the)-347(data)-347(allocation)-348(boundaries)-347(for)]TJ 0 -11.955 Td [(each)-223(pr)18(ocess;)-232(r)18(equir)18(es)-222(no)-223(communication.)-301(Only)-223(the)-222(incomplete)-223(factoriza-)]TJ 0 -11.955 Td [(tion)]TJ/F52 9.9626 Tf 20.498 0 Td [(I)-96(L)-9(U)]TJ/F85 10.3811 Tf 18.202 0 Td [(\050)]TJ/F54 9.9626 Tf 4.149 0 Td [(0)]TJ/F85 10.3811 Tf 5.106 0 Td [(\051)]TJ/F54 9.9626 Tf 6.64 0 Td [(is)-250(curr)18(ently)-250(implemented.)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(allocatable)]TJ 0 g 0 G -ET -q -1 0 0 1 99.895 130.181 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S -Q -BT -/F54 5.9776 Tf 110.755 123.219 Td [(4)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(The)-250(string)-250(is)-250(case-insensitive)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 150.037 -29.888 Td [(151)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1872 0 obj -<< -/Length 7572 ->> -stream + [-525(offset\050:\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(10.2)-1000(build)-250(\227)-250(Builds)-250(a)-250(preconditioner)]TJ + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -20.364 Td [(call)-525(prec%build\050a,)-525(desc_a,)-525(info[,amold,vmold,imold]\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf 0 -24.086 Td [(T)90(ype:)]TJ + [-525(nblocks,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-525(nzeros)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -29.828 -22.815 Td [(On)-250(Entry)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G + [(\050psb_ipk_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G - 0 -22.816 Td [(a)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(system)-250(sparse)-250(matrix.)-310(Scope:)]TJ/F51 9.9626 Tf 146.229 0 Td [(local)]TJ/F54 9.9626 Tf -131.285 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(sparse)-250(matrix)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 190.872 0 Td [(psb)]TJ -ET -q -1 0 0 1 382.802 580.382 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 385.94 580.183 Td [(Tspmat)]TJ -ET -q -1 0 0 1 417.95 580.382 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 421.088 580.183 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-525(hack)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -291.305 -22.815 Td [(prec)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-190(as:)-280(an)-190(alr)18(eady)-190(initialized)-190(pr)18(econdtioner)-190(data)-190(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 277.288 0 Td [(psb)]TJ -ET -q -1 0 0 1 469.217 509.746 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 472.355 509.547 Td [(prec)]TJ -ET -q -1 0 0 1 493.904 509.746 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 497.043 509.547 Td [(type)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [-525(64)]TJ 0 g 0 G -/F51 9.9626 Tf -346.338 -34.771 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 474.975 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 474.776 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 208.625 0 Td [(local)]TJ/F54 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(,)-250(tar)18(get.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(descriptor)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 247.649 0 Td [(psb)]TJ -ET -q -1 0 0 1 439.579 439.11 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 442.717 438.911 Td [(desc)]TJ -ET -q -1 0 0 1 464.266 439.11 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 467.404 438.911 Td [(type)]TJ +0.56 0.13 0.00 rg 0.56 0.13 0.00 RG + 0 -10.959 Td [(integer)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [(\050psb_long_int_k_\051)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -337.62 -22.816 Td [(amold)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(::)]TJ 0 g 0 G -/F54 9.9626 Tf 33.763 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(matrix)-250(storage.)]TJ -8.857 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F59 9.9626 Tf 201.393 0 Td [(psb)]TJ -ET -q -1 0 0 1 393.323 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 396.461 368.275 Td [(T)]TJ -ET -q -1 0 0 1 402.319 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 405.457 368.275 Td [(base)]TJ -ET -q -1 0 0 1 427.006 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 430.144 368.275 Td [(sparse)]TJ -ET -q -1 0 0 1 462.154 368.474 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 465.292 368.275 Td [(mat)]TJ/F54 9.9626 Tf 15.691 0 Td [(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -330.278 -22.816 Td [(vmold)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + [-525(dim)]TJ 0 g 0 G -/F54 9.9626 Tf 34.321 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(vector)-250(storage.)]TJ -9.415 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(object)-250(of)-250(a)-250(class)-250(derived)-250(fr)18(om)]TJ/F59 9.9626 Tf 201.393 0 Td [(psb)]TJ -ET -q -1 0 0 1 393.323 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 396.461 297.638 Td [(T)]TJ -ET -q -1 0 0 1 402.319 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 405.457 297.638 Td [(base)]TJ -ET -q -1 0 0 1 427.006 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 430.144 297.638 Td [(vect)]TJ -ET -q -1 0 0 1 451.693 297.838 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 454.832 297.638 Td [(type)]TJ/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [(=)]TJ 0 g 0 G -/F51 9.9626 Tf -325.048 -22.815 Td [(imold)]TJ +0.25 0.63 0.44 rg 0.25 0.63 0.44 RG + [(0)]TJ +0 g 0 G +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 32.099 0 Td [(The)-250(desir)18(ed)-250(dynamic)-250(type)-250(for)-250(the)-250(internal)-250(integer)-250(vector)-250(storage.)]TJ -7.193 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-190(as:)-280(an)-190(object)-190(of)-190(a)-190(class)-190(derived)-190(fr)18(om)-190(\050integer\051)]TJ/F59 9.9626 Tf 235.804 0 Td [(psb)]TJ -ET -q -1 0 0 1 427.733 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 430.872 227.002 Td [(T)]TJ -ET -q -1 0 0 1 436.73 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 439.868 227.002 Td [(base)]TJ -ET -q -1 0 0 1 461.417 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 464.555 227.002 Td [(vect)]TJ -ET -q -1 0 0 1 486.104 227.202 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 489.242 227.002 Td [(type)]TJ/F54 9.9626 Tf 20.922 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -359.459 -24.085 Td [(On)-250(Return)]TJ +/F62 9.9626 Tf 142.565 -36.164 Td [(168)]TJ 0 g 0 G +ET + +endstream +endobj +2136 0 obj +<< +/Type /XObject +/Subtype /Form +/FormType 1 +/PTEX.FileName (../figures/hdia.pdf) +/PTEX.PageNumber 1 +/PTEX.InfoDict 2143 0 R +/BBox [0 0 556 211] +/Resources << +/ProcSet [ /PDF /ImageC /Text ] +/ExtGState << +/R7 2144 0 R +>>/XObject << +/R8 2145 0 R +>>/Font << /R9 2146 0 R/R11 2147 0 R>> +>> +/Length 3027 +/Filter /FlateDecode +>> +stream +xœÝZK9Ý¿¢–E×øýX˜ 4IKE¬&êeXÌßç\û>\î$HhFBQé{¿ãû´OÙ®úx¸ÓŽþñÿÏ·oÞÔãý¿oÞ?ßÜñýÍ÷~ºpøÖÛéòñ¬ +߆âéöö–ý†Å‚ñîÌ6c‰á¬õð.1X4!“ÂtìyG±fE¥P7”h.¨Üvk.¨öÕ^¢Ø†Í•ýŽbÍŠªg«(ˆO¥Npög£³¢xÈnb}mˆ+(xw‰âJ~±âª;¨¾E·DqŸX±‚êYú¬DÓ¦ûÈsM4©GHµï(Ñ,¨àÆRÍŠJqG‰fEÕ¨ú… žÚ˜à¡œ™ÐaLùgÓø44<Áw”hT +mC©fEå¾£D³¢Ú T{‰ÂÂõsVú™a+šs»hZP9í(ÖŒúá¯4mŽEÐ[?1WÆdè ÛºRÓ†ÍJ'Ó*¯aæRVÅÀ2FƒÌ£`BØ0¬XóˆY+!•ì±¯šÑᬕÈŠ5Tp»-Öê#¨¸œÁ·šœ‹;°²ÝñÓß?ÿ„ )"E:Ù&_4Ó±ì(Ѭ¨v”hõJ‰‡Wy×)îÔñAÜX²9ö#?Ï!û¿™œ±þ›?o¦I'ÖÖ¡Ð +ˆ hHz>iÊ9og¤U˜(”?a†EÀ³u¶ßaÈFg0Pð«ùŒõ]½ùÏ>[pSÐ؇(©É@I]LsqF(ÓwépF­ºOM‹å‘ëLN’»X¸ÖÿQ‚§sé>- é@¤–´D–ÕE“Ý;~·†€bšsMÒ(»ÖÄgCyj¯KC*’K®hGj…I’9-‘­' bF-pé\\ ‚*¯’°$0DÉjþlZ*´tÅ…3DkŠ‹´ôµ'S\Z¿KGxôµþÚlnböeiHèg-=kðtCTÆJó¾®+¤Ÿ)øn+¤ Žæu ÏÖýºBi yš´œÔ…&­ˆY± sÙ|èlç ÆRIX¢äÇ56-²†àéÚxvŒàbA —ù$²õD\µpmÂãeuXXĨƒ¹r0ñiŠÂ¶Olâ‡aa00)CŸ¦HÓœ{º­ϱ<[ÐÖ±ãë;÷ØjlK{ˆ®÷¹oÇïþ²%º eá[hPצ¤9c()©¬œešIijùN}(#†°KKB™QÖ&„ +±ÑžG 7DÔÙgû=fZ:š·‚‹yl$oApYÔOíŇL~‰‚‡F9å5Öh¦Ó‚Ubú0º”(„O%Já[ÉÃY2]£fáÚIÉЮ”b^)yˆÆÁü%JFšqGê5 …žÎÈ4C`ôc^¹Çs.­ ¨ÝDbš¦kAGã’ÐE5þìiàË\+ˆ ÁA£Á,Ä4zŠãИ’S],¹q +àÕÀ–ÅèÚWšÙ‡ñZ1m¹©†CË8?¬¹e—¶Ühc·æÖó57‘-7ÖhhlABg–›Ä ŽQ ìYP×¾ÖÌÆZkaðfÃÞ%÷1#YSñüõtÃIoÃ`·FzT#Ô:ž©""ÒÒw+ ÄÁíj€eqA#Xƒ#͸g =з‰ 1€cP€D-¶,ÆZûJ3k-Í^Zn¢‘Ðrœ³ABÏqLË-‡9Ÿæ|S,›cÕphbABg–›Ä ‰Z lYŒµö•fF]æf^˜ò¨gÚ8ų7ÕÄŠýlT3"ª± Ú¿þV)Z?·¾ãf̺¡1Ö°X]]ƒùLÈ¿†ÙYáKjÏ¢ÖKedjˆ›<Ÿ®ð/o•¢‡‹í—çÛ>úéÆÅPó\+u¿Uóq™¿¨Ùñ¼(q|KàcÉtõ¼h|¤;ÓDŸ8TÜè3¦ÆÃYË«HŒÇŠ”Û+½’¬Q‹‰n¨û±ûìþǨÙ¡k£1*R$¢™®`˜jš“†Òè •f‘ª,vU#žÅûØ}JoÒ|íYÖ„5%sMè%Çxé?k‚SW-"RMXQ»Ö„G°Æ|d®ÉÕ§Ô„ì5kšéŠjÒãéÕwjt·n²¨VUÁne4;Ørô!”6t„A· ¡Y!éeB±:³h ¬yâJà×:ùD#†R[Dï°a§˜Ì–Isµ<H«eÕðd¡ èm8Ï% P§õ§#Þ´Ï·màxG¼ÎZË‘ýn9s…#Àƒjõ§á‚N5‚ÌcG,r©™®Çum”Ò|ûi¬½–»8¾PÃôE!øæ*]Šâ ÛæGR,_#ù¬ÝÍ?fCésöâÄñßâÝ´Ï·}$½”Ç®`Ly®…ÈR+Q—æ#­ÍK³ÙzT&zµ)ŠH_x6£=“¥O±ôù¢Og„ixLEð8•ˆÉ«KŽc³2žÙOO¦áU ¡òjSÃJ[êYâE,l^yÖÑgeƒ”´Ø¢þð™‰o² }DêBtôú§ëÏÎ'ºÖÑ"ý¨†ù‡Ç +Š}–é惄†Ì¶fbôBÍ™þïñùøÍÃí›7ýéxøñ6¿qžßŸ`Ú dœöPÀ‡çÛ{õð›Ïø%¥ƒ^w¬ã_îÎ…)ôzÜãÉŽ-îú ƒʪ#Oú! smèéRßa÷ý»;ÿê¾aÃì\º«¯þúð‡Û=–ŒsØﮆßÝݦ@ïwñÕ}Íî.ã?„NC ¬Ô,PŒ˜{¤ …áÛÔ…öJŸ«÷ôº8¬å¹×ô}¯ ÏHzw÷û׿ýãñ§ï¾{‹ÀU­áîÛ‡}±£ÇÀ„nbURA4Ûƒãǹ@7ú#Ãׯ"íW}¼{{¼~ófZÍÍã‡êðg)wæG?Üþ4º‘ +endstream +endobj +2145 0 obj +<< +/Subtype /Image +/ColorSpace /DeviceGray +/Width 2362 +/Height 946 +/BitsPerComponent 1 +/Interpolate true +/Filter /CCITTFaxDecode +/DecodeParms << +/K -1 +/Columns 2362 +>> +/Length 150 +>> +stream +ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ 0l?ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿü@ +endstream +endobj +2149 0 obj +<< +/Filter /FlateDecode +/Length 177 +>> +stream +xœ]1à EwNÁ €0´C”%]24ªÚ^€€‰ˆ¡·/8I‡¶ülë›õÃmð.SöHA¿ Së¼I°†-i ÌÎÑPãt>³^T$¬¿«øþD eìΣZ€=Å;b×è``JCR~ÒrÞµÖv¼ùÉ]0Ùc³1ç%“V¡KUP¼ J¼vêêáêð4Dõ–øŒo ÍjÏyø}C¬*Z‚|ïY› +endstream +endobj +2151 0 obj +<< +/Filter /FlateDecode +/Length 213 +>> +stream +xœ]1Â0 E÷œ"7hZ(¢Rä.¦ÊÐ4 +eàöØ.00<«/µ¥oW»Ãþ⬫s™ügb +>¦gñ¨{¼Ç¤êFÑÏ“êG—Uµ;º|{eÔÔ€añ“±ºÔ¼ÔËŒŸ|d籸tGe(LÃ߯nè糡NƪʮkŒ¡JºtÅÚ’nD[Ö-¤[ÖÒž5€@JIlÛ€m׬ôEÚ@ÚIÎo"ŽÌ»WÕþY +¦Y$àÅcÂß ó”yJê Ûál« +endstream +endobj +2152 0 obj +<< +/Filter /FlateDecode +/Length1 10604 +/Length 7344 +>> +stream +xœÍzyxTU²xÕ9÷ÞîNÒésÓÙ:!!„%±%7+K !áÓIXÂf!( LÂ(¢…qpâ‚ð‡NG° >‰>ynê¸>G¢âˆÎd†qw„ô¯ÎM@ù}ó½ïÍ÷ýþøÝCUSUg«S§Î9À =À¡¶¦.7ô/ãB [×4wŒ•Ó3°¶uÃzû—sÞÛJŒ÷ Ï/ëX¾fj×\c€Ü¹|õÆecúITÏôhÛÒæ%/]©È, æ”6bDd†<IåÔ¶5ëoïï+BöÕí­Íãý yäšæ;”ÕÖä?Mȯo^³t\_ÔS;Ú;ו3…¾½cÝҎח¹¨­ÐzK¬|ªl‰|$>$8Kpntvà‚¼ +ÔÑ•aAµŸ'Ü û ÎãDx†`6<%P {`œ‚# +ñe@…r8N´ƒJˆAîƒwáZXŸÀ0d@|€áÔNt@4>#\·Ž“V”Áo`WcäR~&ËFõ¼+01x5ð•„O05Ð3)÷'ƒtè†_B8¬„—„¡àfü àRÔXWÁQx«(76ÊÂjªõÆàPàLàSxFBXJ-ýn¥û`ˆMàerØ! ®†¹ÐLÒŸÁ»¹H”î#îø‚¹Øo¹Æá‚YзÃCd·à,|Á8ÄC”^ÿÊïÐت  6‘_=HÖ;‡á8Nĉ,†Åµb ®!Ù.ØOýÀi¬ÂÂgù~9o´8ˆ +|@ÔÓ÷Á³ÔÇW˜G:ÔOáë¥di½œq+Íp <§á5Çd÷¯á;Ì¢ô!û9ë, + |Bc1‚ ¦Áp'Ù6 Jiì5¤]Gmï UòŸÒ[4Ë0´Ó,¦á\œËqÞ~|ße +s°µìsîå/ó÷¥)²(¢–¢!™úUa´Ñ +üœ¬}'Í÷ ¼/b¦aÍè-ªÿ »Š•Sz„bðm|—tA¾etxôÏ£?zÁ@^6ƒìГþ†Ñ4†L\‰ø1|7{’‡r+Wùd^Âð~+ßÃÿ‹ÿ^Z'’Þ“gÉÍò!Cóèõ£¯ª7“-W:dCL%ÿYFÞ´ŠÆ×Ail†­Ð w¿Ü }pˆæ}^„7áðZ@yõ¾†¼nÞAé><ŒÏâ ø"~ˆßˆÄR(e°)¬˜•±J¶œm£´‡fo±s<‘·ònÞCi/?Æß•@’¤€œOi¦¼C> ¼lÈ0Ì4´_¹0r1ëbÃÅFa4~ôßFï}vôÓÀÂÀF¿r`t;ò>òÁý”'O<¿…Wàm}¬_ C™<>Uò†lZµbœ³(ÍÁy”®¡´SjÆl£Ô=ø ¼ oÆÛñ.=ÝKsÛÿŽÇ(=…ƒ”ÞÄ3ø'ü¿`äÄŒ“7;Y:Ëe…4Ó26ƒÕ°ù”–³vJlÛ@+t€ °ãì-Á<‡7óµü>þþÿÿ^bR¶”+¹¥…Òré&é”ôšôŽôƒl“+ä6y¯üœ’ (×(+•{•#Ê9å‚A1ÔZ › 0ŒNŠV¿£y…Ÿ~¹Ê)ì”#¥ÙÚ±¼CÞŽ×Ŷ€¯æwð×åexžÛñ=ìå+øªÀ#¼’}ÇÛq!;‰)Ü&ñe°xˆ}ȾbŸJQ¸€}†Ò/ñ)Ö΢":‘ߢ¤›äsìm(b[pˆ½Àoâ7þŠä½xFÞË^»4Ì"à íêíìªô{¶‚í€z©@þVÝÿ]¾‘ì=ÝŠYüÒ^ø„«ìK‘¢Äë° 9æ‘ï\úFázÚ{X:Å´ +Š&o`>ÄÂ=ï¿}ZDlùyùÙC<æC4²—¡ˆöÆ'”êáȇAòÁ[!Ý ›=¸„âþŠŸ ü¸r1˜¢e ­›Î‹h–B±°‰zýŽâÿKõ«ð¯pÚig A†$$;¥ +ŠLŠ¿;(-F*=w*Gå7 c$ûè^òò÷á::s>¦þãÁMã[ IÙ4j;EæµTãÑ™ Qº^F[hÌÓiŸ×J3)òÞXI3\AgT5‰/ŠÀ=PFk7?pS`4 +\ Ë¡.pâ¦Àv¹-”]RÅØñy:þwPÜž ïQí²l<‡c2¸Wr’¥f©äzó× ý“•jÅ +ÏLÚj4FoDY=O` c9–Àõ¦È¯½Ü²(Ô‡ˆ¶$§¢ûÿ¿ÁH¬sÐ^éµzfŽá† ‡ãYÉ8/jéäÇjãsò¹®,_uEùŠá…ôr°”ƪ,îí ºBVIÁª··RµWözz›ýžÕnU{óz^ßÛQá¹´üþÀàŽoåΚDåˆ;"£°È +?¬o-Ò맟Aç˜à :}ÚA¦óÎJ¼j‰¥·$;âTTäk~¢® :õedæë_|bþðÙaºˆÚˆqÆ K>ð•–Žg¦LË dåäŸ) âÀßÿ€Ÿ¡CE¯51!ÿ|‰™È:ãmÐÇÿ^o 5-ßIþ +É_â/Òy*ª½è3‡åSƒ¿ãOÑ-ÝF÷У㒣¡aùPÒÉo§é >M0Lpž@‚v~º v!ÀBØFKP#8ü?DãÜOõ-„s Ú vH°€?NüUóƒ|%Œ6¾“¦QDwð_éôQ¢ñD&>=øCTtßxù~¢Bþëqþ}TŽ&zï8½‡ø DïÖ¼6~×xyïÒë­§}¼Ó—l³–$“ÜNGÀ)·‡r{Èt{¨„‘.v«õžú‰æ]3FÉ\[|U_£-1qù}dÒ-dú-d¹-d¹- ‘hó%Íc:9|3él&ͤ³™¬’Ç;©¿NZ0 l%°p²{'Ù]𽄇Nëü› ï&è%~Ù1“Fu_éË°‘“-(Ôò‹OÐ=©ÙeqIù»~,™‚„# §¡»T—.0…îÒø¤1JZ«JBy+üŒ€A$áT‚‚r‰·úRsmƒ|.¬1‚jëfݼ[ê–¥¼r ?Éó¡Öä’á<ܤikrãT©ÃÔcâV“Ý”gÒLµ&¹žD»8·ñ\^Ìkx—ý!Ÿ¡hm†R4iwp_°7x(øt°ìU†”ÓÊ°r^‘íJž¢)µŠGéPz”ÝJŸbÚ­ì60OpGpO0·Ûƒó‚µàÚ`ÙfÀ¾’m¼ElZÂV‚‚ÝÙ¸‰øv~A­F™â:âa ’•à4凉ÊT²ž…ô,ĵ×B\ ,$µ‚Žq©rYr©ŽÐ?/$é$ %n(Ùv˜ðy‘#˜M%3•ÌT2“ÖivFh%l'¨%à:o˜€¼†ð%YÞ¸ÜC èòóºÎ%™&ê² ZsúP&z3±/wg¢æ..É×R…‡‡7©MΦŒ¦ýR»ÚîlÏhß/Õ¨5ΚŒšýR±Zì,Î(Þ/媹ÎÜŒÜý’Mµ9m¶ýÒ®ê#Õ'«OUKMÕíÕÝÕ|*-݀ϕ—¯Ó§ G}qñùS-%W³#4&ÂûÎp°¶ä´ÈìˆÎ}‚¸O÷ ¨!h"©Ö"Ķ˟.9!gWÈ9Mþ°¯hRMI5…Ý&‚}œÚ>LòúöXîˆÎ÷Öù5ãú}:_hÙ.ÕAp±îÓ6\ ÅM2œâ‹à µNØFÐAp„@â‹)-â‹Ø”³Ã<[3OŒ²At4,áaFk‰•…/˜ñ ŽïÕñm:.Öqª:ÛüÍló3³Í·Ì6§S†eÐdÆ=:vhÁ%æ'KÌ5%æÌ3µ0³(+ãŸuèpê4-úàÍ¢S£OÝJ­(O:ÏÚ¾uŸ‡¯ÑâÛkûøišßB*~„³|‡l¯æòÙNeûÑyÌö{õ„í…T?.ôÙ†²ýFœÌö3wŽí~u±í>'•}¶­ÙO‹aÀšñB7dO·U»Ù*~$±æ¦Î´ [‘ºÎVHìi~œ5pÈ61Õ/†’Gm:fË¢ÓT}(×Ld“Á€]Z¶a½¡Å°Ð0Ïp•a’!Ç`7$ ‘Æp£Õj 1FÅ(™Œ‘þÀ°æ÷¶HŪÿÄ' ,éy+˜é?ýC#£½ãàU¬ª®½áUPµ Ô;ÕUå7æ{§¹ª¼ÆÚ«ïG¼£J^v«aA=9¨`mK»ã€˜»íöA7o»½¡«¼C­PÕb÷~SGó¢Kª¬–ÆBô†âØâðéa…•åÿyƱëÇ/ÖõÓ/6É{wU]½÷ñ¤o¾È’ª¼3ijð8[ËÚ+ʳAêã&¶¶b¾àã¦ò†ËjÂ:H Ü‚µHj‚ºZµ®FnšRQÞŸ’2¦ôÎJä>ÏéJËÇÚJ¥.¨­ZAH%CªÞV*KjäcY~ÚX EoÌzc‰B©ßé$•l§PéŸê$…~çT]|èG±êN8õ~œØ ÷ƒø£NƘyÁ¸3’Žëÿå·´ô_PÆæ÷—´ŠÇ¹G­XJàñîØÐëíi±Ûû—¼?þjOó´´¶ Ú¼Ôû¾º´Ü»D-·÷7·þq«7«åýÐZ± ¾¿U[ZîkÖš+Ôæò†ÇºËª®èë¶Ë}•uÿ“ƺEce¢¯Çªþ‰¸Jˆ}U‰¾ªD_ié}UÍ/ŪÚú~#”6ÐëN§,8ˆöƒ'ÁÑPm혮oŽ«±?O”€Ž­`z‡¨¥^3å”ä”íN! +?¿Œ‹b~•#aŽ‹¬ÄSKÁ±+Ê/ÿëìì\/ «ËEx}W¬Î[O›ÖQWå­E·×]áÕ<å (–£kü+«×¬'ݧܬÝÝíÞåÞç>â–»ºˆ~2åT +kJiOéNÙ•²/åHŠ"×ÖÓÜûRþ–»țp=}åzŸ]DéŸ(®ïêPcݹº\eõ%)ÐJ·^¤zD¨“êdøOÂo|Lð%7þÁ#‚Ãsx=ÝËE .tbyþ@Þäüi~¢ÍËÆhÝâ1Z1wŒºKòc‰úŠ'•XèŽ0Hø%‚÷>'øÌóy¾Þxט×6tB§ iø@…õuºÖ£‹2(̽¾ÓåÂÁiHÕ…Wú=`g)hAˆ’ÎíÕº½¬ÇÄÃYÖïÌôŠžÓÏð{†.«vÒ²ägÏ<É!È 2G⌊|’ä 8f‚ Wáuë²~ã¾èžkýÊ=碊)o½@hbž#Ìæ$D¡.ØùÐM†À. Q—öçøSr½ÔSaÐ×l´Ó©é“å(AÌæx?Z´pS<¤iiLKó¤õ¥ §Iia‚ÚDünØ}4î8ç &ÓQë¢4Ιkm\ûÍœ()™˜W¶Q«ÆT55%•) 92ÅàLLHJHNàJDšÅœÇ‡Ö6%¾#C)B¹T´·`‚‘P¸5ªâ‚é1E ,²²¶F„O2)?&:,’)jJzÚTkLô¤ü)S§„¤§¥§©)…ÍÞ¹~±çÍ÷ßúFËs[×<_Q¸vÊúä y©…™Eå“g°½ç°f~ɾFüeôØ]Ÿ<ûíè¹þ»š×ÆÂs÷wæ9®®}@·™–é8Ù,ˆnqokÑf +îfÍÂ5 f…`”&ˆÜ$+(…›A +1KJˆÙìÇD-Ü`Œ4ŒF.”#ØÌh>ÐJã>Í,£b2*ŠQ–BB¤tþp0â2-Ød²pÜÇpÆýø­KW]¤»=–>Ë°…[Í€†¸ÐA¼ãÒ¬u‹htÏùª‘²² +Ÿ(.̵º)X/®s‡†bXxáö .i‹õy‘µX,ó`]#6®]‡Qj˜昌“ˆ ?~lÿÅçX×õûGSñ«;FËzø/.ìd]lÒÿ°çÉ& +Ù$îÑ"µXOl_ìp¬±Z,Û· -‰Àtã5a¤ßŠ¼‘ò*UþŽf²¢‰ø…Fw9 31”MÆÆa¿%õYZxh¨E ›œgé¶ì¦YK–¸˜A–ŠgÇ]Îåžc9kž_즹`X!|=r¿v¹hN¸¶1Â9),2::&Ê1y:›,ÜBxÅyœíˆp_;Ê<Ó¢ƒ Îxg©ô»‡~ؾnZ2s:YÒÄMìý=Yöd›˜cuà,ß/WC0‰pD¡ŠÚ†ôß³·GÝx×莵y &%ÉÕiÿxFz!a‚'xlë°‡ÕÓ£ˆC±fgrOÒ’)Ý2¢þƒ fÅZôànìÃÓ¨  ŽB´`±ËÅFò-È!L}7Š^uL¾ø‹¹g¼mŠ5 ÉGò±ú8†}!…&ñk†;¤°ÄTT\•"2afæ´L­ÀSpª`¸àÛ `‰©[Ý4áñÔ㩃^œpF=ãüï Ÿ§|æ ™eÌôãÎŒ +øÙÙÓy˜ççG¹lÆh?î;š¤¹r ’èY1`5gfœÀ6ˆûX ®µ÷ØÙn{ŸÙý|Ò€7Cü¸›ø9=9lwN_Ë!þÑ&C·üì-H+À¾‚¡V@k4ý)-âd‹ˆ›$ÂÞ¹KaמּçF×~%ÐYŠÅ]#ëŠGG sÇ"á” ¹ÉiAIIq¨ŽT‡Ó!)²34--ˆB\®”Ó‚ÉÊ9‚Ó[0È4AÉkA›9IÄ<«{ü|ÉÚJŸîÓë`--ï=òEE¢R)i“ tV´'ë0MU…ߧo+ê¿ù‘E¥ƒ[z:îýóm­¹Ž¸ø°cœYËîQãm®»çÚköÍÜ깿Mš}Û]+kïÙ;ñØϼ[–§'eåb%xïꚪiI%ÉA×Ý\³¼û±±õm¡õÝHûÃAþ\ö¨„á É+’»ån¥;i§t{’a2›ì¸†_c_äX•¸AÞ˜¸õÆ÷&>šúÔaÕBo;‹5,<"*:Æifœ‹€fwDÚ¹dwÄ'$rC¬$w߀ÝîˆÄïéôÐ(²âGÀ>r8hû âtzXÏ8Úcèë†_Óº©¨©•©äß³²>:D#šÉ®Yû¬Ì—2ˆwágú"žm¤0bm1R_ʳ´±(O§˜¾€UÂÂc +·'¸d +š +b£5B#]Ž´àUR{ø’ä¹#InlÀF48 ’XE1Щ4¶,—…Ö$ùƹ£m hºÛ¢›çunÜÔ>AOÏ­šÓÕ¿wÇš§Q’«?–¾÷Vÿªc=éSëò]VGA÷ÏÞ,Ê10‹8‹êÉæý´§b!F´¬.Ó† Baz×ù™SQ8nᛤMÑÛb$·1C‘¹—§p{“´'ŽÙÓ0-ÍBÛøöXÅÑ?`1Óí5±Z8…®,-‹iYž¬¾¬á,)+n̾$‚k„="/B‹ØÑaˆˆËüñp¡qÎųã7} Pp"ë5Ž¬#sá˜Íh#d':MáI‰É‰L sšÓœ&•\ݚЎPÊ¥¥µ`b¸½RBÁ¥#_x¿îûÊ ÂžÂçÅ‘Vž:e*Q‘—M¬¦(üî›<²*u÷/w¼²|ó+;šŸ¹-ß­ºøJøŒÊI³Ývë–´Er›Ó\óðïnkö>¾óñk0éέ¿X¾½Îóaiî£÷ú‡]x¸Þ¢à÷:+gëx‹”(½®ßÆþ +)~§MO À¿öѵ9(Ì`”þÅšÿÿ~Lѱ$ìs~j @8G`ñèsÖóÂj +YÑF.þÎ X°bÍÒιKo˜ß¾¦ùúÚº9 ô,p÷xýÿÍg¼²xή`ŒÿK)ÄDÀl;Áy‚jö8Ô­ƒ¨[ßÿáí°×þÒï=2ØdqmŒëøá“žôèÕðÃú‹;­EÆP%ï÷ÿ=X +endstream +endobj +2153 0 obj +<< +/Filter /FlateDecode +/Length1 5080 +/Length 3303 +>> +stream +xœÍW}pÇu{¾ûÀá@‚îp)!H€`HŠ¦(J´TY–,pdÙ$EËr¤„uÕöäÃi’Zƒ6¶•hœŽëIâL:I󇻢S[IGi:´V“ŽËq;©;ã~ØhR'ÓvEöí¤d7MÿëanoßÛ·ï÷~ûvBððpø¶£E œGþw,Ž/Ÿ[\íÈÒòüòù‡ô€ü$*~ŠïS÷­ž>i¿mg¸•Óg¹¯cï~ péþ•ÅS¼ÿ‹O(_Eeù~Tx–ùÿ +Êý÷Ÿ{èᎽr‹Ñ³Y^ìÎÄ¢ïÜâëdM¼Šöÿ…²þáÅs+Ýõ½ŠEbõ#}¨Ûÿ1Ö¾úàÊjc±÷€+Žkº›µ+ÜÓHáoe˜„¸ÏÑ‚cø;Òiø0|ª Ap;FA‡Ýp Á‡à ,Á)@…¸"ÌA?œ ¼†»`<¿ á·`>Ÿ„£ðˆìÇá7á³?…<å>\£·ž¯S0wÇ©«MÕÝ'êú+”(#ñ%yýïh0W \þÀ‘Ú^³n(Ÿ?×éÌášAgêêʳ®†iag¦©õ¸a$ôõ€p5Ǻk3³HÞÔ×»“›zíÀÑ„AI½Ö@‡̆©7æ"ëÐéÂ>*²0ȸn‰9À*òûh°¹øÀ½7zº*yt¢ñ8ƒíÖSfÃCõõÉÄKØͯÁ ™™%^aœ’«±òHÍ\ÂÕ›³ üs‘Ÿ9Rk"÷,Ï6‰NðCõeÚ³Ò·5—š§¨E\°(8¤ÇóÙÍq8%¼úîÅkªðÓ¼ç 9šùg˜ö#Ã9”9ˆ±®ÞßÅV÷ dós”|PïášMÈu{žE”ŠHÖ謄ZÅb´\²b%«RÒÌ—SJZ˜ ‹Í—žœYõ>Õ§U¾zîÓ…ƒ·œ?#$`Ñjöe‘úXÈYÍl“²JCÛ·`eN½;Mé0%B´aô±¦ÝH¯"ñü +:%‰³g3¤A„ÑÒ®ï¶ç¬Á¿âeÄïs/TèÕ'6þåi}‚ôM¥íCŠæ>ƒ\êÙõw2#H8ÞíqÉ÷ŽŸQ¿]I»°Íœ&ö˜ÁjéÊËÏíOGBrÅÓÆxÀ€Yh‹hã(mÁ‡¬Ê)Û c‰û)ˆAóÅÛ¶‰ß`¡Š““lËðÌG·“RJNzQFGx3æ;¡šæÈ©Ç/¿ðäSüâ¾¾ÜD%¹ëP!ˆæª¢áï=ó¥+ßöKß%Ç÷¯ÎåÅáÝÇo*--MN.-mÇ+‹ñê‡ç¡`Y9è„…ú¬fe'|m§ÒR:áJDY,š/×J»QÜ%iW»éN3­›ørÍ´›UÓ¦/Gh¶H#부1MYM!Â?ÚDV0 ŠEÏÝ51!u ¢­!½c"FJ°x»ƒHØPÕVØ.”Ù.4GHŽ¼?Æš£ÍIR²Ê•y]ó혺hLl¼YMéNá{Båc¢Á‹“C>í)óÇSýü 1|rÏ@H}ÍœìÜ-R›¿ ÿ€1OCs€qY/Ùy b´hûpw†"s$*1g‹E*¬c¾´5¹Ýáƒè‰&Ú€AM†ÛöNüj¸z³JAú#ÞíÕóìÀIÊkAQb›ì|× *5…ävùddîbÐÇnØ«#¤sð é;'Šs°”bŽ“GgOÞ={z~xÉß+¨±žøòäÈÄÜm˵óÇ­ÓÁ€Hb)-»´§2÷xqa´r¼zðžLæojwŒíØŸ;=ýkg2Jø•ßÝåG ù164}ŒÅ˜œJ+Ð!„ÂXÒ‹JVÙJ¦½ +Ã¥7†,I ùdI +Y2”bÚ¡,2 5Ī© n¸HÃë6 K’V3ÌÎÿ™0²„ÊcB©‘±Zýjô‹v ©¡v¨‘cÔàÈI¶Ç[­Úr/ÊbV‰Æ>@ <ÿ;CTGüKcâC'²}…kÿTÑ·yòx4ôÌLÖ§iþég'Ã¥Ûÿ@qcä;ש28þætfk݆8iøo¨©0x¢}$!ñB ´šÄa>q£{,¥Ç‹Ô½Þr‰pšºÚÛa> +stream 0 g 0 G - 0 -22.816 Td [(prec)]TJ 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econdtioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 193.612 0 Td [(psb)]TJ -ET -q -1 0 0 1 385.542 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 388.68 132.281 Td [(prec)]TJ -ET +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG q -1 0 0 1 410.229 132.48 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 99.895 662.293 cm +0 0 343.711 49.813 re f Q -BT -/F59 9.9626 Tf 413.367 132.281 Td [(type)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf -98.279 -41.843 Td [(152)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +BT +/F102 8.9664 Tf 112.299 690.488 Td [(contains)]TJ 0 g 0 G -ET - -endstream -endobj -1876 0 obj -<< -/Length 1021 ->> -stream +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + 4.707 -10.959 Td [(....)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG + -4.707 -10.959 Td [(end)-525(type)]TJ 0 g 0 G -BT -/F51 9.9626 Tf 99.895 706.129 Td [(info)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ -24.907 -21.918 Td [(The)]TJ/F59 9.9626 Tf 20.388 0 Td [(amold)]TJ/F54 9.9626 Tf 26.152 0 Td [(,)]TJ/F59 9.9626 Tf 6.506 0 Td [(vmold)]TJ/F54 9.9626 Tf 29.862 0 Td [(and)]TJ/F59 9.9626 Tf 20.577 0 Td [(imold)]TJ/F54 9.9626 Tf 29.862 0 Td [(ar)18(guments)-372(may)-373(be)-372(employed)-373(to)-372(interface)-372(with)]TJ -133.347 -11.955 Td [(special)-250(devices,)-250(such)-250(as)-250(GPUs)-250(and)-250(other)-250(accelerators.)]TJ 0 g 0 G - 164.384 -533.997 Td [(153)]TJ +/F62 9.9626 Tf 151.98 -578.132 Td [(169)]TJ 0 g 0 G ET endstream endobj -1884 0 obj +2161 0 obj << -/Length 5673 +/Length 7014 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 150.705 706.129 Td [(10.3)-1000(apply)-250(\227)-250(Preconditioner)-250(application)-250(routine)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(12.4)-1000(CUDA-class)-250(extensions)]TJ/F62 9.9626 Tf 0 -18.964 Td [(For)-285(computing)-285(with)-285(CUDA)-285(we)-285(de\002ne)-285(a)-285(dual)-285(memorization)-285(strategy)-285(in)-285(which)]TJ 0 -11.955 Td [(each)-368(variable)-368(on)-368(the)-369(CPU)-368(\050\223host\224\051)-368(side)-368(has)-368(a)-368(GPU)-368(\050\223device\224\051)-368(side.)-665(When)-368(a)]TJ 0 -11.955 Td [(GPU-type)-355(variable)-355(is)-355(initialized,)-382(the)-355(data)-355(contained)-355(is)-356(\050usually\051)-355(the)-355(same)-355(on)]TJ 0 -11.955 Td [(both)-246(sides.)-309(Each)-246(operator)-245(invoked)-246(on)-246(the)-246(variable)-246(may)-246(change)-246(the)-246(data)-246(so)-246(that)]TJ 0 -11.956 Td [(only)-250(the)-250(host)-250(side)-250(or)-250(the)-250(device)-250(side)-250(ar)18(e)-250(up-to-date.)]TJ 14.944 -11.955 Td [(Keeping)-208(track)-209(of)-209(the)-208(updates)-209(to)-208(data)-209(in)-208(the)-209(variables)-208(is)-209(essential:)-289(we)-209(want)-208(to)]TJ -14.944 -11.955 Td [(perform)-220(most)-221(comp)1(utations)-221(on)-220(the)-220(GPU,)-221(but)-220(we)-220(cannot)-220(af)18(for)18(d)-221(the)-220(time)-220(needed)]TJ 0 -11.955 Td [(to)-327(move)-326(data)-327(between)-326(the)-327(host)-327(memory)-326(and)-327(the)-326(device)-327(memory)-327(because)-326(the)]TJ 0 -11.955 Td [(bandwidth)-351(of)-352(the)-351(inter)18(connection)-352(bus)-351(would)-351(become)-352(the)-351(main)-352(bottleneck)-351(of)]TJ 0 -11.955 Td [(the)-262(computation.)-347(Thus,)-265(each)-263(and)-262(every)-262(computational)-262(r)18(outine)-262(in)-263(the)-262(library)-262(is)]TJ 0 -11.956 Td [(built)-250(accor)18(ding)-250(to)-250(the)-250(following)-250(principles:)]TJ 0 g 0 G + 13.888 -18.472 Td [(\225)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%apply\050x,y,desc_a,info,trans,work\051)]TJ 0 -11.955 Td [(call)-525(prec%apply\050x,desc_a,info,trans\051)]TJ + [-500(If)-254(the)-253(data)-254(type)-253(being)-254(handled)-254(is)-253(GPU-enabled,)-255(make)-253(sur)18(e)-254(that)-254(its)-253(device)]TJ 11.018 -11.955 Td [(copy)-351(is)-352(up)-351(to)-351(date,)-377(perform)-351(any)-351(arithmetic)-351(operation)-351(on)-352(the)-351(GPU,)-351(and)]TJ 0 -11.955 Td [(if)-314(the)-313(data)-314(has)-314(been)-313(alter)18(ed)-314(as)-313(a)-314(r)18(esult,)-330(mark)-313(the)-314(main-memory)-314(copy)-313(as)]TJ 0 -11.955 Td [(outdated.)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ + -11.018 -19.199 Td [(\225)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ + [-500(The)-266(main-memory)-266(copy)-266(is)-266(never)-267(u)1(pdated)-267(unless)-266(this)-266(is)-266(r)18(equested)-266(by)-266(the)]TJ 11.018 -11.955 Td [(user)-250(either)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F59 9.9626 Tf 0 -19.199 Td [(explicitly)]TJ 0 g 0 G +/F62 9.9626 Tf 47.582 0 Td [(by)-250(invoking)-250(a)-250(synchr)18(onization)-250(method;)]TJ 0 g 0 G - 0 -19.925 Td [(prec)]TJ +/F59 9.9626 Tf -47.582 -15.213 Td [(implicitly)]TJ 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 117.837 0 Td [(local)]TJ/F54 9.9626 Tf -117.279 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 388.441 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 391.579 577.576 Td [(prec)]TJ +/F62 9.9626 Tf 49.793 0 Td [(by)-276(invoking)-276(a)-277(method)-276(that)-276(involves)-276(other)-277(data)-276(items)-276(that)-276(ar)18(e)]TJ -27.875 -11.955 Td [(not)-250(GPU-enabled,)-250(e.g.,)-250(by)-250(assignment)-250(ov)-250(a)-250(vector)-250(to)-250(a)-250(normal)-250(array)111(.)]TJ -46.824 -19.199 Td [(In)-264(this)-264(way)111(,)-268(data)-264(items)-264(ar)18(e)-265(put)-264(on)-264(the)-264(GPU)-264(memory)-265(\223on)-264(demand\224)-264(and)-264(r)18(emain)]TJ 0 -11.955 Td [(ther)18(e)-337(as)-337(long)-337(as)-337(\223normal\224)-337(computations)-337(ar)18(e)-337(carried)-337(out.)-571(As)-337(an)-336(example,)-359(the)]TJ 0 -11.955 Td [(following)-250(call)-250(to)-250(a)-250(matrix-vector)-250(pr)18(oduct)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG ET q -1 0 0 1 413.128 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.705 364.481 cm +0 0 343.711 16.936 re f Q -BT -/F59 9.9626 Tf 416.266 577.576 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(x)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(sour)18(ce)-250(vector)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 111.142 0 Td [(local)]TJ/F54 9.9626 Tf -96.198 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ -ET -q -1 0 0 1 410.618 521.985 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 413.756 521.785 Td [(T)]TJ -ET -q -1 0 0 1 419.614 521.985 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 422.752 521.785 Td [(vect)]TJ -ET -q -1 0 0 1 444.301 521.985 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG BT -/F59 9.9626 Tf 447.439 521.785 Td [(type)]TJ +/F102 8.9664 Tf 172.523 370.757 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ + [-525(psb_spmm\050alpha,a,x,beta,y,desc_a,info\051)]TJ +0.95 0.95 0.95 rg 0.95 0.95 0.95 RG 0 g 0 G -/F51 9.9626 Tf -317.656 -19.925 Td [(desc)]TJ -ET -q -1 0 0 1 171.218 502.059 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S -Q -BT -/F51 9.9626 Tf 174.207 501.86 Td [(a)]TJ +/F62 9.9626 Tf -21.818 -23.482 Td [(will)-321(transpar)18(ently)-322(and)-321(automatically)-321(be)-322(performed)-321(on)-321(the)-322(GPU)-321(whenever)-321(all)]TJ 0 -11.955 Td [(thr)18(ee)-274(data)-274(inputs)]TJ/F67 9.9626 Tf 78.001 0 Td [(a)]TJ/F62 9.9626 Tf 5.23 0 Td [(,)]TJ/F67 9.9626 Tf 5.282 0 Td [(x)]TJ/F62 9.9626 Tf 7.961 0 Td [(and)]TJ/F67 9.9626 Tf 19.598 0 Td [(y)]TJ/F62 9.9626 Tf 7.962 0 Td [(ar)18(e)-274(GPU-enabled.)-382(If)-275(a)-274(pr)18(ogram)-274(makes)-274(many)-274(such)]TJ -124.034 -11.956 Td [(calls)-250(sequentially)111(,)-250(then)]TJ 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(the)-250(pr)18(oblem)-250(communication)-250(descriptor)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 208.625 0 Td [(local)]TJ/F54 9.9626 Tf -217.183 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-250(as:)-310(a)-250(communication)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 200.207 0 Td [(psb)]TJ -ET -q -1 0 0 1 392.137 466.194 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 395.275 465.994 Td [(desc)]TJ -ET -q -1 0 0 1 416.824 466.194 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 419.963 465.994 Td [(type)]TJ + 13.888 -18.472 Td [(\225)]TJ 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ + [-500(The)-322(\002rst)-322(kernel)-322(invocation)-322(will)-322(\002nd)-322(the)-322(data)-322(in)-322(main)-322(memory)111(,)-340(and)-322(will)]TJ 11.019 -11.955 Td [(copy)-356(it)-357(to)-356(the)-357(GPU)-356(memory)111(,)-383(thus)-356(incurring)-357(a)-356(signi\002cant)-356(over)18(head;)-410(the)]TJ 0 -11.955 Td [(r)18(esult)-250(is)-250(however)]TJ/F60 9.9626 Tf 78.155 0 Td [(not)]TJ/F62 9.9626 Tf 15.771 0 Td [(copied)-250(back,)-250(and)-250(ther)18(efor)18(e:)]TJ 0 g 0 G -/F51 9.9626 Tf -290.179 -19.925 Td [(trans)]TJ + -104.945 -19.199 Td [(\225)]TJ 0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(Scope:)]TJ -2.76 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(character)74(.)]TJ + [-500(Subsequent)-378(kernel)-377(invocations)-378(involving)-377(the)-378(same)-377(vector)-378(will)-378(\002nd)-377(the)]TJ 11.018 -11.955 Td [(data)-250(on)-250(the)-250(GPU)-250(side)-250(so)-250(that)-250(they)-250(will)-250(r)8(un)-250(at)-250(full)-250(speed.)]TJ -24.906 -18.472 Td [(For)-285(al)1(l)-285(invocations)-285(af)1(ter)-285(the)-285(\002rst)-284(the)-285(only)-284(data)-285(that)-284(will)-285(have)-284(to)-285(be)-284(transferr)18(ed)]TJ 0 -11.955 Td [(to/fr)18(om)-298(the)-298(main)-298(memory)-297(will)-298(be)-298(the)-298(scalars)]TJ/F67 9.9626 Tf 204.122 0 Td [(alpha)]TJ/F62 9.9626 Tf 29.12 0 Td [(and)]TJ/F67 9.9626 Tf 19.834 0 Td [(beta)]TJ/F62 9.9626 Tf 20.921 0 Td [(,)-310(and)-298(the)-298(r)18(eturn)]TJ -273.997 -11.955 Td [(code)]TJ/F67 9.9626 Tf 23.213 0 Td [(info)]TJ/F62 9.9626 Tf 20.921 0 Td [(.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -19.926 Td [(work)]TJ +/F59 9.9626 Tf -44.134 -20.101 Td [(V)111(ectors:)]TJ 0 g 0 G -/F54 9.9626 Tf 28.782 0 Td [(an)-250(optional)-250(work)-250(space)-250(Scope:)]TJ/F51 9.9626 Tf 136.476 0 Td [(local)]TJ/F54 9.9626 Tf -140.352 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(optional)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(double)-250(pr)18(ecision)-250(array)111(.)]TJ +/F62 9.9626 Tf 40.677 0 Td [(The)-253(data)-253(type)]TJ/F67 9.9626 Tf 63.08 0 Td [(psb_T_vect_gpu)]TJ/F62 9.9626 Tf 75.745 0 Td [(pr)18(ovides)-253(a)-253(GPU-enabled)-253(extension)-253(of)]TJ -154.595 -11.955 Td [(the)-306(inner)-307(type)]TJ/F67 9.9626 Tf 65.726 0 Td [(psb_T_base_vect_type)]TJ/F62 9.9626 Tf 104.607 0 Td [(,)-321(and)-306(must)-307(be)-306(used)-306(together)-307(with)]TJ -170.333 -11.955 Td [(the)-327(other)-326(inner)-327(matrix)-326(type)-327(to)-327(make)-326(full)-327(use)-326(of)-327(the)-327(GPU)-326(computational)]TJ 0 -11.956 Td [(capabilities;)]TJ 0 g 0 G -/F51 9.9626 Tf -24.906 -21.918 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -24.907 -19.198 Td [(CSR:)]TJ 0 g 0 G +/F62 9.9626 Tf 27.945 0 Td [(The)-210(data)-210(type)]TJ/F67 9.9626 Tf 61.792 0 Td [(psb_T_csrg_sparse_mat)]TJ/F62 9.9626 Tf 111.928 0 Td [(pr)18(ovides)-210(an)-210(interface)-209(to)-210(the)-210(GPU)]TJ -176.758 -11.955 Td [(version)-250(of)-250(CSR)-250(available)-250(in)-250(the)-250(NVIDIA)-250(CuSP)92(ARSE)-250(library;)]TJ 0 g 0 G - 0 -19.925 Td [(y)]TJ + 139.476 -29.888 Td [(170)]TJ 0 g 0 G -/F54 9.9626 Tf 10.52 0 Td [(the)-250(destination)-250(vector)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 131.914 0 Td [(local)]TJ/F54 9.9626 Tf -117.528 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ ET -q -1 0 0 1 410.618 276.904 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q + +endstream +endobj +2165 0 obj +<< +/Length 5445 +>> +stream +0 g 0 G +0 g 0 G +0 g 0 G BT -/F59 9.9626 Tf 413.756 276.704 Td [(T)]TJ +/F59 9.9626 Tf 99.895 706.129 Td [(HYB:)]TJ +0 g 0 G +/F62 9.9626 Tf 29.061 0 Td [(The)-201(data)-202(typ)1(e)]TJ/F67 9.9626 Tf 61.537 0 Td [(psb_T_hybg_sparse_mat)]TJ/F62 9.9626 Tf 111.842 0 Td [(pr)18(ovides)-201(an)-201(interface)-202(to)-201(the)-201(HYB)]TJ -177.533 -11.955 Td [(GPU)-353(storage)-354(available)-354(in)-353(the)-354(NVIDIA)-353(CuSP)92(ARSE)-354(library)111(.)-621(The)-353(internal)]TJ 0 -11.955 Td [(str)8(uctur)18(e)-251(is)-252(opaque,)-252(hence)-251(the)-252(host)-251(side)-252(is)-252(just)-251(CSR;)-252(the)-251(HYB)-252(data)-251(format)]TJ 0 -11.956 Td [(is)-250(only)-250(available)-250(up)-250(to)-250(CUDA)-250(version)-250(10.)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.26 Td [(ELL:)]TJ +0 g 0 G +/F62 9.9626 Tf 25.734 0 Td [(The)-290(data)-291(type)]TJ/F67 9.9626 Tf 64.203 0 Td [(psb_T_elg_sparse_mat)]TJ/F62 9.9626 Tf 107.501 0 Td [(pr)18(ovides)-290(an)-291(interface)-290(to)-291(the)-290(ELL-)]TJ -172.531 -11.955 Td [(P)92(ACK)-250(implementation)-250(fr)18(om)-250(SPGPU;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.261 Td [(HLL:)]TJ +0 g 0 G +/F62 9.9626 Tf 27.946 0 Td [(The)-190(data)-190(type)]TJ/F67 9.9626 Tf 61.199 0 Td [(psb_T_hlg_sparse_mat)]TJ/F62 9.9626 Tf 106.5 0 Td [(pr)18(ovides)-190(an)-190(interface)-190(to)-190(the)-190(Hacked)]TJ -170.738 -11.955 Td [(ELLP)92(ACK)-250(implementation)-250(fr)18(om)-250(SPGPU;)]TJ +0 g 0 G +/F59 9.9626 Tf -24.907 -20.261 Td [(HDIA:)]TJ +0 g 0 G +/F62 9.9626 Tf 35.696 0 Td [(The)-341(data)-342(type)]TJ/F67 9.9626 Tf 65.722 0 Td [(psb_T_hdiag_sparse_mat)]TJ/F62 9.9626 Tf 118.468 0 Td [(pr)18(ovides)-341(an)-342(int)1(erface)-342(to)-341(the)]TJ -194.979 -11.955 Td [(Hacked)-250(DIAgonals)-250(implementation)-250(fr)18(om)-250(SPGPU;)]TJ/F59 14.3462 Tf -24.907 -34.763 Td [(13)-1000(CUDA)-250(Environment)-250(Routines)]TJ/F59 11.9552 Tf 0 -24.857 Td [(psb)]TJ ET q -1 0 0 1 419.614 276.904 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 514.195 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 422.752 276.704 Td [(vect)]TJ +/F59 11.9552 Tf 124.116 513.996 Td [(cuda)]TJ ET q -1 0 0 1 444.301 276.904 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 514.195 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 447.439 276.704 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -317.656 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.745 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F59 11.9552 Tf 154.315 513.996 Td [(init)-250(\227)-250(Initializes)-250(PSBLAS-CUDA)-250(environment)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -54.42 -19.126 Td [(call)]TJ 0 g 0 G - 139.477 -118.52 Td [(154)]TJ + [-525(psb_cuda_init\050ctxt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -ET - -endstream -endobj -1889 0 obj -<< -/Length 3387 ->> -stream + [-525([,)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G + [-525(device]\051)]TJ/F62 9.9626 Tf 14.944 -22.253 Td [(This)-250(subr)18(outine)-250(initializes)-250(the)-250(PSBLAS-CUDA)-250(envir)18(onment.)]TJ 0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(10.4)-1000(descr)-250(\227)-250(Prints)-250(a)-250(description)-250(of)-250(current)-250(preconditioner)]TJ +/F59 9.9626 Tf -14.944 -20.177 Td [(T)90(ype:)]TJ 0 g 0 G +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%descr\050info\051)]TJ 0 -11.955 Td [(call)-525(prec%descr\050info,iout,)-525(root\051)]TJ +/F59 9.9626 Tf -29.828 -20.261 Td [(On)-250(Entry)]TJ 0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ + 0 -20.26 Td [(device)]TJ 0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ +/F62 9.9626 Tf 34.311 0 Td [(ID)-250(of)-250(CUDA)-250(device)-250(to)-250(attach)-250(to.)]TJ -9.404 -11.956 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(local)]TJ/F62 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(optional)]TJ/F62 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-254(as:)-317(an)-254(integer)-253(value.)-575(Default:)-317(use)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf 193.007 0 Td [(mod)]TJ 0 g 0 G + [(\050iam,ngpu\051)]TJ/F62 9.9626 Tf 70.521 0 Td [(wher)18(e)]TJ/F67 9.9626 Tf 29.935 0 Td [(iam)]TJ/F62 9.9626 Tf 18.218 0 Td [(is)]TJ -311.681 -11.955 Td [(the)-275(call)1(ing)-275(pr)18(ocess)-275(index)-274(and)]TJ/F67 9.9626 Tf 131.323 0 Td [(ngpu)]TJ/F62 9.9626 Tf 23.656 0 Td [(is)-275(the)-274(total)-275(number)-274(of)-275(CUDA)-274(devices)]TJ -154.979 -11.956 Td [(available)-250(on)-250(the)-250(curr)18(ent)-250(node.)]TJ/F59 11.9552 Tf -24.907 -20.176 Td [(Notes)]TJ 0 g 0 G - 0 -19.925 Td [(prec)]TJ +/F62 9.9626 Tf 12.454 -20.177 Td [(1.)]TJ 0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)-310(Scope:)]TJ/F51 9.9626 Tf 117.836 0 Td [(local)]TJ/F54 9.9626 Tf -117.278 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ + [-500(A)-250(call)-250(to)-250(this)-250(r)18(outine)-250(must)-250(pr)18(ecede)-250(any)-250(other)-250(PSBLAS-CUDA)-250(call.)]TJ/F59 11.9552 Tf -12.454 -29.61 Td [(psb)]TJ ET q -1 0 0 1 337.631 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 270.423 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 340.77 577.576 Td [(prec)]TJ +/F59 11.9552 Tf 124.116 270.224 Td [(cuda)]TJ ET q -1 0 0 1 362.319 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 270.423 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 365.457 577.576 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(iout)]TJ +/F59 11.9552 Tf 154.315 270.224 Td [(exit)-250(\227)-250(Exit)-250(from)-250(PSBLAS-CUDA)-250(environment)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -54.42 -19.126 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 23.243 0 Td [(output)-250(unit.)-310(Scope:)]TJ/F51 9.9626 Tf 87.391 0 Td [(local)]TJ/F54 9.9626 Tf -85.727 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(number)74(.)-310(Default:)-310(default)-250(output)-250(unit.)]TJ + [-525(psb_cuda_exit\050ctxt\051)]TJ/F62 9.9626 Tf 14.944 -22.254 Td [(This)-250(subr)18(outine)-250(exits)-250(fr)18(om)-250(the)-250(PSBLAS)-250(CUDA)-250(context.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(root)]TJ +/F59 9.9626 Tf -14.944 -20.176 Td [(T)90(ype:)]TJ 0 g 0 G -/F54 9.9626 Tf 23.253 0 Td [(Pr)18(ocess)-250(fr)18(om)-250(which)-250(to)-250(print)-250(Scope:)]TJ/F51 9.9626 Tf 155.834 0 Td [(local)]TJ/F54 9.9626 Tf -154.18 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.956 Td [(Speci\002ed)-387(as:)-585(an)-387(integer)-387(number)-388(between)-387(0)-387(and)]TJ/F52 9.9626 Tf 220.442 0 Td [(n)-80(p)]TJ/F83 10.3811 Tf 14.01 0 Td [(\000)]TJ/F54 9.9626 Tf 10.638 0 Td [(1,)-422(in)-387(which)-387(case)]TJ -245.09 -11.955 Td [(the)-314(speci\002e)1(d)-314(pr)18(ocess)-314(will)-313(print)-314(the)-313(description,)-330(or)]TJ/F83 10.3811 Tf 225.38 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1,)-329(in)-314(which)-314(case)-313(all)]TJ -233.574 -11.955 Td [(pr)18(ocesses)-250(will)-250(print.)-310(Default:)-310(0.)]TJ +/F62 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ +/F59 9.9626 Tf -29.828 -20.261 Td [(On)-250(Entry)]TJ 0 g 0 G 0 g 0 G - 0 -19.925 Td [(info)]TJ + 0 -20.261 Td [(ctxt)]TJ 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ +/F62 9.9626 Tf 21.021 0 Td [(the)-250(communication)-250(context)-250(identifying)-250(the)-250(virtual)-250(parallel)-250(machine.)]TJ 3.886 -11.955 Td [(Scope:)]TJ/F59 9.9626 Tf 31.432 0 Td [(global)]TJ/F62 9.9626 Tf 28.782 0 Td [(.)]TJ -60.214 -11.955 Td [(T)90(ype:)]TJ/F59 9.9626 Tf 27.088 0 Td [(required)]TJ/F62 9.9626 Tf 39.292 0 Td [(.)]TJ -66.38 -11.955 Td [(Intent:)]TJ/F59 9.9626 Tf 31.8 0 Td [(in)]TJ/F62 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable.)]TJ 0 g 0 G - 139.477 -263.975 Td [(155)]TJ + 139.477 -29.888 Td [(171)]TJ 0 g 0 G ET endstream endobj -1790 0 obj +2041 0 obj << /Type /ObjStm /N 100 -/First 967 -/Length 10102 +/First 1000 +/Length 12945 >> stream -1786 0 501 58 1787 115 1788 172 1783 230 1792 311 1794 429 505 488 1795 546 1796 605 -1791 664 1798 745 1800 863 509 921 1801 978 1802 1035 1797 1093 1804 1187 1806 1305 513 1364 -1803 1422 1809 1516 1807 1655 1811 1800 517 1858 1812 1915 1813 1973 1808 2031 1816 2125 1814 2264 -1818 2409 521 2468 1819 2526 1820 2585 1815 2644 1823 2738 1821 2877 1825 3022 525 3080 1826 3137 -1827 3194 1822 3252 1831 3346 1829 3485 1833 3628 529 3687 1834 3745 1835 3803 1830 3862 1838 3956 -1836 4095 1840 4240 533 4298 1841 4355 1842 4413 1837 4471 1845 4591 1843 4730 1847 4872 537 4931 -1848 4989 1849 5048 1844 5107 1851 5227 1853 5345 1850 5403 1855 5484 1857 5602 541 5661 1854 5719 -1861 5813 1858 5961 1859 6108 1863 6256 545 6314 1864 6371 1860 6429 1871 6549 1866 6715 1867 6860 -1868 7006 1869 7153 1873 7300 549 7359 1870 7417 1875 7511 1877 7629 1874 7687 1883 7781 1878 7947 -1879 8092 1880 8235 1881 8381 1885 8525 553 8584 1882 8642 1888 8736 1886 8875 1890 9020 557 9078 -% 1786 0 obj -<< -/D [1784 0 R /XYZ 98.895 753.953 null] ->> -% 501 0 obj -<< -/D [1784 0 R /XYZ 99.895 716.092 null] ->> -% 1787 0 obj -<< -/D [1784 0 R /XYZ 99.895 678.98 null] ->> -% 1788 0 obj -<< -/D [1784 0 R /XYZ 99.895 679.195 null] ->> -% 1783 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1792 0 obj -<< -/Type /Page -/Contents 1793 0 R -/Resources 1791 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R ->> -% 1794 0 obj -<< -/D [1792 0 R /XYZ 149.705 753.953 null] ->> -% 505 0 obj -<< -/D [1792 0 R /XYZ 150.705 716.092 null] ->> -% 1795 0 obj -<< -/D [1792 0 R /XYZ 150.705 689.963 null] ->> -% 1796 0 obj -<< -/D [1792 0 R /XYZ 150.705 693.143 null] ->> -% 1791 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] ->> -% 1798 0 obj -<< -/Type /Page -/Contents 1799 0 R -/Resources 1797 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R ->> -% 1800 0 obj -<< -/D [1798 0 R /XYZ 98.895 753.953 null] ->> -% 509 0 obj +2032 0 2033 153 2034 308 2035 460 2039 612 577 671 581 729 2036 787 2052 909 2044 1066 +2045 1215 2047 1362 2054 1509 585 1567 589 1624 2055 1681 2056 1739 2051 1797 2062 1934 2068 2073 +2069 2253 2050 2296 2064 2443 2059 2502 2065 2561 2066 2620 2067 2679 2061 2737 2073 2873 2085 3012 +2086 3192 2088 3235 2089 3440 2090 3729 2092 3950 2049 4163 2075 4309 2060 4367 2076 4426 2077 4485 +2078 4544 2079 4603 2080 4662 2081 4721 2082 4780 2083 4838 2071 4897 2084 4956 2072 5014 2101 5206 +2106 5363 2107 5543 2108 5586 2109 5889 2110 6094 2112 6307 2097 6528 2098 6675 2099 6822 2103 6967 +2104 7026 2105 7085 2100 7144 2121 7350 2126 7489 2127 7669 2128 7712 2129 7913 2130 8208 2132 8429 +2119 8642 2123 8789 2116 8847 2124 8906 2117 8964 2125 9023 2120 9080 2139 9233 2143 9372 2144 9552 +2146 9595 2147 9802 2148 10103 2150 10324 2137 10537 2141 10684 2142 10743 2138 10802 2155 10982 2157 11100 +2154 11158 2160 11241 2162 11359 593 11418 2159 11476 2164 11598 2166 11716 597 11774 601 11830 2167 11887 +% 2032 0 obj << -/D [1798 0 R /XYZ 99.895 716.092 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [267.568 552.736 279.523 561.692] +/A << /S /GoTo /D (cite.DesPat:11) >> >> -% 1801 0 obj +% 2033 0 obj << -/D [1798 0 R /XYZ 99.895 678.98 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [283.001 552.835 294.956 561.841] +/A << /S /GoTo /D (cite.CaFiRo:2014) >> >> -% 1802 0 obj +% 2034 0 obj << -/D [1798 0 R /XYZ 99.895 679.195 null] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [298.433 552.736 310.389 561.841] +/A << /S /GoTo /D (cite.Sparse03) >> >> -% 1797 0 obj +% 2035 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [198.555 540.88 210.51 549.737] +/A << /S /GoTo /D (cite.OurTechRep) >> >> -% 1804 0 obj +% 2039 0 obj << -/Type /Page -/Contents 1805 0 R -/Resources 1803 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R +/D [2037 0 R /XYZ 149.705 753.953 null] >> -% 1806 0 obj +% 577 0 obj << -/D [1804 0 R /XYZ 149.705 753.953 null] +/D [2037 0 R /XYZ 150.705 716.092 null] >> -% 513 0 obj +% 581 0 obj << -/D [1804 0 R /XYZ 150.705 716.092 null] +/D [2037 0 R /XYZ 150.705 525.151 null] >> -% 1803 0 obj +% 2036 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R /F60 666 0 R /F102 1016 0 R >> /ProcSet [ /PDF /Text ] >> -% 1809 0 obj +% 2052 0 obj << /Type /Page -/Contents 1810 0 R -/Resources 1808 0 R +/Contents 2053 0 R +/Resources 2051 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R -/Annots [ 1807 0 R ] +/Parent 2057 0 R +/Annots [ 2044 0 R 2045 0 R 2047 0 R ] >> -% 1807 0 obj +% 2044 0 obj << /Type /Annot /Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 438.278 369.462 450.338] -/A << /S /GoTo /D (spdata) >> ->> -% 1811 0 obj -<< -/D [1809 0 R /XYZ 98.895 753.953 null] ->> -% 517 0 obj -<< -/D [1809 0 R /XYZ 99.895 716.092 null] ->> -% 1812 0 obj -<< -/D [1809 0 R /XYZ 99.895 676.015 null] ->> -% 1813 0 obj -<< -/D [1809 0 R /XYZ 99.895 679.195 null] ->> -% 1808 0 obj -<< -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Border[0 0 0]/H/I/C[0 1 0] +/Rect [121.315 282.166 133.27 291.173] +/A << /S /GoTo /D (cite.MRC:11) >> >> -% 1816 0 obj +% 2045 0 obj << -/Type /Page -/Contents 1817 0 R -/Resources 1815 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1789 0 R -/Annots [ 1814 0 R ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [253.836 246.183 265.791 255.592] +/A << /S /GoTo /D (table.21) >> >> -% 1814 0 obj +% 2047 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 571.778 420.271 583.837] -/A << /S /GoTo /D (spdata) >> +/Rect [314.807 176.296 321.781 188.355] +/A << /S /GoTo /D (figure.6) >> >> -% 1818 0 obj +% 2054 0 obj << -/D [1816 0 R /XYZ 149.705 753.953 null] +/D [2052 0 R /XYZ 98.895 753.953 null] >> -% 521 0 obj +% 585 0 obj << -/D [1816 0 R /XYZ 150.705 716.092 null] +/D [2052 0 R /XYZ 99.895 349.244 null] >> -% 1819 0 obj +% 589 0 obj << -/D [1816 0 R /XYZ 150.705 676.015 null] +/D [2052 0 R /XYZ 99.895 231.907 null] >> -% 1820 0 obj +% 2055 0 obj << -/D [1816 0 R /XYZ 150.705 679.195 null] +/D [2052 0 R /XYZ 99.895 211.056 null] >> -% 1815 0 obj +% 2056 0 obj +<< +/D [2052 0 R /XYZ 99.895 120.166 null] +>> +% 2051 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F120 1782 0 R /F102 1016 0 R /F62 667 0 R /F67 913 0 R /F59 665 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1823 0 obj +% 2062 0 obj << /Type /Page -/Contents 1824 0 R -/Resources 1822 0 R +/Contents 2063 0 R +/Resources 2061 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1821 0 R ] +/Parent 2057 0 R +/Annots [ 2050 0 R ] >> -% 1821 0 obj +% 2068 0 obj +<< +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133929+01'00') +/ModDate (D:20140329133929+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) +>> +% 2069 0 obj +<< +/Type /ExtGState +/OPM 1 +>> +% 2050 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 438.278 369.462 450.338] -/A << /S /GoTo /D (spdata) >> +/Rect [184.361 326.969 191.335 339.028] +/A << /S /GoTo /D (algocf.1) >> >> -% 1825 0 obj +% 2064 0 obj << -/D [1823 0 R /XYZ 98.895 753.953 null] +/D [2062 0 R /XYZ 149.705 753.953 null] >> -% 525 0 obj +% 2059 0 obj << -/D [1823 0 R /XYZ 99.895 716.092 null] +/D [2062 0 R /XYZ 150.705 716.092 null] >> -% 1826 0 obj +% 2065 0 obj << -/D [1823 0 R /XYZ 99.895 678.98 null] +/D [2062 0 R /XYZ 397.506 408.774 null] >> -% 1827 0 obj +% 2066 0 obj +<< +/D [2062 0 R /XYZ 150.705 264.598 null] +>> +% 2067 0 obj << -/D [1823 0 R /XYZ 99.895 679.195 null] +/D [2062 0 R /XYZ 150.705 232.98 null] >> -% 1822 0 obj +% 2061 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F62 667 0 R /F60 666 0 R /F93 915 0 R /F67 913 0 R >> +/XObject << /Im6 2046 0 R >> /ProcSet [ /PDF /Text ] >> -% 1831 0 obj +% 2073 0 obj << /Type /Page -/Contents 1832 0 R -/Resources 1830 0 R +/Contents 2074 0 R +/Resources 2072 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1829 0 R ] ->> -% 1829 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [352.526 426.323 428.58 438.383] -/A << /S /GoTo /D (vdata) >> +/Parent 2057 0 R +/Annots [ 2049 0 R ] >> -% 1833 0 obj +% 2085 0 obj << -/D [1831 0 R /XYZ 149.705 753.953 null] +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133928+01'00') +/ModDate (D:20140329133928+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 529 0 obj +% 2086 0 obj << -/D [1831 0 R /XYZ 150.705 716.092 null] +/Type /ExtGState +/OPM 1 >> -% 1834 0 obj +% 2088 0 obj << -/D [1831 0 R /XYZ 150.705 678.98 null] +/BaseFont /YAZDUX+TimesNewRomanPSMT +/FontDescriptor 2090 0 R +/ToUnicode 2091 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 0 0 500 500 500] +/Subtype /TrueType >> -% 1835 0 obj +% 2089 0 obj << -/D [1831 0 R /XYZ 150.705 679.195 null] +/BaseFont /NDNSMY+FreeSerif +/FontDescriptor 2092 0 R +/ToUnicode 2093 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 0 0 0 0 0 0 0 385 0 0 0 0 0 0 0 667 529 0 0 0 0 0 701] +/Subtype /TrueType >> -% 1830 0 obj +% 2090 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /FontDescriptor +/FontName /YAZDUX+TimesNewRomanPSMT +/FontBBox [ 15 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2094 0 R >> -% 1838 0 obj +% 2092 0 obj << -/Type /Page -/Contents 1839 0 R -/Resources 1837 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1836 0 R ] +/Type /FontDescriptor +/FontName /NDNSMY+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2095 0 R >> -% 1836 0 obj +% 2049 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 574.778 369.462 586.838] -/A << /S /GoTo /D (spdata) >> +/Rect [384.587 583.767 391.56 595.827] +/A << /S /GoTo /D (figure.5) >> >> -% 1840 0 obj +% 2075 0 obj << -/D [1838 0 R /XYZ 98.895 753.953 null] +/D [2073 0 R /XYZ 98.895 753.953 null] >> -% 533 0 obj +% 2060 0 obj << -/D [1838 0 R /XYZ 99.895 716.092 null] +/D [2073 0 R /XYZ 389.217 621.446 null] >> -% 1841 0 obj +% 2076 0 obj << -/D [1838 0 R /XYZ 99.895 679.441 null] +/D [2073 0 R /XYZ 114.839 563.747 null] >> -% 1842 0 obj +% 2077 0 obj << -/D [1838 0 R /XYZ 99.895 679.657 null] +/D [2073 0 R /XYZ 114.839 567.034 null] >> -% 1837 0 obj +% 2078 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F94 915 0 R >> -/ProcSet [ /PDF /Text ] +/D [2073 0 R /XYZ 114.839 556.075 null] >> -% 1845 0 obj +% 2079 0 obj << -/Type /Page -/Contents 1846 0 R -/Resources 1844 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R -/Annots [ 1843 0 R ] +/D [2073 0 R /XYZ 114.839 545.116 null] >> -% 1843 0 obj +% 2080 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [452.361 574.59 528.415 586.65] -/A << /S /GoTo /D (vdata) >> +/D [2073 0 R /XYZ 114.839 534.158 null] >> -% 1847 0 obj +% 2081 0 obj << -/D [1845 0 R /XYZ 149.705 753.953 null] +/D [2073 0 R /XYZ 114.839 523.199 null] >> -% 537 0 obj +% 2082 0 obj << -/D [1845 0 R /XYZ 150.705 716.092 null] +/D [2073 0 R /XYZ 114.839 512.24 null] >> -% 1848 0 obj +% 2083 0 obj << -/D [1845 0 R /XYZ 150.705 679.413 null] +/D [2073 0 R /XYZ 114.839 501.281 null] >> -% 1849 0 obj +% 2071 0 obj << -/D [1845 0 R /XYZ 150.705 679.628 null] +/D [2073 0 R /XYZ 114.839 481.057 null] >> -% 1844 0 obj +% 2084 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F94 915 0 R >> +/D [2073 0 R /XYZ 99.895 294.895 null] +>> +% 2072 0 obj +<< +/Font << /F62 667 0 R /F59 665 0 R /F102 1016 0 R /F120 1782 0 R /F60 666 0 R /F93 915 0 R /F91 914 0 R /F67 913 0 R >> +/XObject << /Im7 2048 0 R >> /ProcSet [ /PDF /Text ] >> -% 1851 0 obj +% 2101 0 obj << /Type /Page -/Contents 1852 0 R -/Resources 1850 0 R +/Contents 2102 0 R +/Resources 2100 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R +/Parent 2057 0 R +/Annots [ 2097 0 R 2098 0 R 2099 0 R ] >> -% 1853 0 obj +% 2106 0 obj << -/D [1851 0 R /XYZ 98.895 753.953 null] +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133928+01'00') +/ModDate (D:20140329133928+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 1850 0 obj +% 2107 0 obj << -/Font << /F94 915 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Type /ExtGState +/OPM 1 >> -% 1855 0 obj +% 2108 0 obj << -/Type /Page -/Contents 1856 0 R -/Resources 1854 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1828 0 R +/BaseFont /MCSFLP+FreeSerif +/FontDescriptor 2110 0 R +/ToUnicode 2111 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 670 0 610 564 0 714 0 385 709 0 0 0 724 0 0 667 529 606 0 0 0 0 701] +/Subtype /TrueType >> -% 1857 0 obj +% 2109 0 obj << -/D [1855 0 R /XYZ 149.705 753.953 null] +/BaseFont /AJRCAD+TimesNewRomanPSMT +/FontDescriptor 2112 0 R +/ToUnicode 2113 0 R +/Type /Font +/FirstChar 48 +/LastChar 57 +/Widths [ 500 500 500 500 500 0 0 500 500 500] +/Subtype /TrueType >> -% 541 0 obj +% 2110 0 obj << -/D [1855 0 R /XYZ 150.705 716.092 null] +/Type /FontDescriptor +/FontName /MCSFLP+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2114 0 R >> -% 1854 0 obj +% 2112 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/Type /FontDescriptor +/FontName /AJRCAD+TimesNewRomanPSMT +/FontBBox [ 15 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2115 0 R >> -% 1861 0 obj +% 2097 0 obj << -/Type /Page -/Contents 1862 0 R -/Resources 1860 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1858 0 R 1859 0 R ] +/Type /Annot +/Subtype /Link +/Border[0 0 0]/H/I/C[1 0 0] +/Rect [453.687 438.686 460.661 450.746] +/A << /S /GoTo /D (figure.5) >> >> -% 1858 0 obj +% 2098 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 442.264 387.374 454.323] -/A << /S /GoTo /D (precdata) >> +/Rect [362.965 188.317 369.938 200.376] +/A << /S /GoTo /D (figure.8) >> >> -% 1859 0 obj +% 2099 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [171.257 352.6 177.533 366.303] -/A << /S /GoTo /D (Hfootnote.4) >> +/Rect [477.685 140.43 484.659 152.49] +/A << /S /GoTo /D (algocf.2) >> >> -% 1863 0 obj +% 2103 0 obj << -/D [1861 0 R /XYZ 98.895 753.953 null] +/D [2101 0 R /XYZ 149.705 753.953 null] >> -% 545 0 obj +% 2104 0 obj << -/D [1861 0 R /XYZ 99.895 716.092 null] +/D [2101 0 R /XYZ 447.542 476.365 null] >> -% 1864 0 obj +% 2105 0 obj << -/D [1861 0 R /XYZ 114.242 129.79 null] +/D [2101 0 R /XYZ 150.705 223.169 null] >> -% 1860 0 obj +% 2100 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R >> +/Font << /F62 667 0 R /F60 666 0 R /F93 915 0 R /F67 913 0 R /F102 1016 0 R /F120 1782 0 R /F69 1460 0 R /F91 914 0 R /F59 665 0 R >> +/XObject << /Im8 2096 0 R >> /ProcSet [ /PDF /Text ] >> -% 1871 0 obj +% 2121 0 obj << /Type /Page -/Contents 1872 0 R -/Resources 1870 0 R +/Contents 2122 0 R +/Resources 2120 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1866 0 R 1867 0 R 1868 0 R 1869 0 R ] +/Parent 2057 0 R +/Annots [ 2119 0 R ] >> -% 1866 0 obj +% 2126 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [365.487 576.377 443.006 588.437] -/A << /S /GoTo /D (spdata) >> +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133927+01'00') +/ModDate (D:20140329133927+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) >> -% 1867 0 obj +% 2127 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [451.902 505.741 518.96 517.801] -/A << /S /GoTo /D (precdata) >> +/Type /ExtGState +/OPM 1 >> -% 1868 0 obj +% 2128 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [422.264 435.105 489.322 447.165] -/A << /S /GoTo /D (descdata) >> +/BaseFont /BNRUAU+TimesNewRomanPSMT +/FontDescriptor 2130 0 R +/ToUnicode 2131 0 R +/Type /Font +/FirstChar 45 +/LastChar 55 +/Widths [ 333 0 0 500 500 500 0 0 0 0 500] +/Subtype /TrueType >> -% 1869 0 obj +% 2129 0 obj +<< +/BaseFont /ICFUKB+FreeSerif +/FontDescriptor 2132 0 R +/ToUnicode 2133 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 0 0 610 564 0 0 0 0 0 0 0 0 724 0 0 667 529 606 0 0 0 0 701] +/Subtype /TrueType +>> +% 2130 0 obj +<< +/Type /FontDescriptor +/FontName /BNRUAU+TimesNewRomanPSMT +/FontBBox [ 21 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2134 0 R +>> +% 2132 0 obj +<< +/Type /FontDescriptor +/FontName /ICFUKB+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2135 0 R +>> +% 2119 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [368.227 128.475 435.285 140.535] -/A << /S /GoTo /D (precdata) >> +/Rect [371.292 548.965 378.265 561.025] +/A << /S /GoTo /D (figure.5) >> >> -% 1873 0 obj +% 2123 0 obj << -/D [1871 0 R /XYZ 149.705 753.953 null] +/D [2121 0 R /XYZ 98.895 753.953 null] >> -% 549 0 obj +% 2116 0 obj << -/D [1871 0 R /XYZ 150.705 716.092 null] +/D [2121 0 R /XYZ 396.732 586.644 null] >> -% 1870 0 obj +% 2124 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/D [2121 0 R /XYZ 99.895 537.967 null] >> -% 1875 0 obj +% 2117 0 obj << -/Type /Page -/Contents 1876 0 R -/Resources 1874 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R +/D [2121 0 R /XYZ 114.839 395.424 null] >> -% 1877 0 obj +% 2125 0 obj << -/D [1875 0 R /XYZ 98.895 753.953 null] +/D [2121 0 R /XYZ 99.895 199.14 null] >> -% 1874 0 obj +% 2120 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F62 667 0 R /F67 913 0 R /F102 1016 0 R /F59 665 0 R /F120 1782 0 R >> +/XObject << /Im9 2118 0 R >> /ProcSet [ /PDF /Text ] >> -% 1883 0 obj +% 2139 0 obj << /Type /Page -/Contents 1884 0 R -/Resources 1882 0 R +/Contents 2140 0 R +/Resources 2138 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1878 0 R 1879 0 R 1880 0 R 1881 0 R ] +/Parent 2057 0 R +/Annots [ 2137 0 R ] >> -% 1878 0 obj +% 2143 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [371.126 573.77 438.184 585.83] -/A << /S /GoTo /D (precdata) >> +/Producer (GPL Ghostscript 9.10) +/CreationDate (D:20140329133928+01'00') +/ModDate (D:20140329133928+01'00') +/Creator (cairo 1.13.1 \(http://cairographics.org\)) +>> +% 2144 0 obj +<< +/Type /ExtGState +/OPM 1 +>> +% 2146 0 obj +<< +/BaseFont /PBIKKX+TimesNewRomanPSMT +/FontDescriptor 2148 0 R +/ToUnicode 2149 0 R +/Type /Font +/FirstChar 45 +/LastChar 55 +/Widths [ 333 0 0 500 500 0 500 500 500 500 500] +/Subtype /TrueType +>> +% 2147 0 obj +<< +/BaseFont /ZBHFTP+FreeSerif +/FontDescriptor 2150 0 R +/ToUnicode 2151 0 R +/Type /Font +/FirstChar 32 +/LastChar 89 +/Widths [ 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 721 0 670 0 610 564 0 714 0 0 709 0 0 0 724 0 0 667 529 606 0 0 0 0 701] +/Subtype /TrueType +>> +% 2148 0 obj +<< +/Type /FontDescriptor +/FontName /PBIKKX+TimesNewRomanPSMT +/FontBBox [ 15 -13 638 675] +/Flags 65540 +/Ascent 675 +/CapHeight 675 +/Descent -13 +/ItalicAngle 0 +/StemV 95 +/MissingWidth 777 +/FontFile2 2152 0 R +>> +% 2150 0 obj +<< +/Type /FontDescriptor +/FontName /ZBHFTP+FreeSerif +/FontBBox [ 0 -71 706 752] +/Flags 65540 +/Ascent 752 +/CapHeight 679 +/Descent -71 +/ItalicAngle 0 +/StemV 105 +/MissingWidth 600 +/FontFile2 2153 0 R >> -% 1879 0 obj +% 2137 0 obj << /Type /Annot /Subtype /Link /Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 517.98 469.357 530.039] -/A << /S /GoTo /D (vdata) >> +/Rect [440.392 356.078 447.366 368.137] +/A << /S /GoTo /D (figure.5) >> >> -% 1880 0 obj +% 2141 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [374.822 462.189 441.88 474.248] -/A << /S /GoTo /D (descdata) >> +/D [2139 0 R /XYZ 149.705 753.953 null] >> -% 1881 0 obj +% 2142 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 272.899 469.357 284.958] -/A << /S /GoTo /D (vdata) >> +/D [2139 0 R /XYZ 447.542 393.756 null] >> -% 1885 0 obj +% 2138 0 obj +<< +/Font << /F62 667 0 R /F67 913 0 R /F60 666 0 R /F59 665 0 R /F93 915 0 R /F102 1016 0 R /F120 1782 0 R >> +/XObject << /Im10 2136 0 R >> +/ProcSet [ /PDF /Text ] +>> +% 2155 0 obj << -/D [1883 0 R /XYZ 149.705 753.953 null] +/Type /Page +/Contents 2156 0 R +/Resources 2154 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2158 0 R >> -% 553 0 obj +% 2157 0 obj << -/D [1883 0 R /XYZ 150.705 716.092 null] +/D [2155 0 R /XYZ 98.895 753.953 null] >> -% 1882 0 obj +% 2154 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> +/Font << /F102 1016 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1888 0 obj +% 2160 0 obj << /Type /Page -/Contents 1889 0 R -/Resources 1887 0 R +/Contents 2161 0 R +/Resources 2159 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R -/Annots [ 1886 0 R ] +/Parent 2158 0 R >> -% 1886 0 obj +% 2162 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 573.77 387.374 585.83] -/A << /S /GoTo /D (precdata) >> +/D [2160 0 R /XYZ 149.705 753.953 null] >> -% 1890 0 obj +% 593 0 obj << -/D [1888 0 R /XYZ 98.895 753.953 null] +/D [2160 0 R /XYZ 150.705 716.092 null] >> -% 557 0 obj +% 2159 0 obj << -/D [1888 0 R /XYZ 99.895 716.092 null] +/Font << /F59 665 0 R /F62 667 0 R /F102 1016 0 R /F67 913 0 R /F60 666 0 R >> +/ProcSet [ /PDF /Text ] >> - -endstream -endobj -1894 0 obj +% 2164 0 obj << -/Length 973 +/Type /Page +/Contents 2165 0 R +/Resources 2163 0 R +/MediaBox [0 0 595.276 841.89] +/Parent 2158 0 R >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 150.705 706.129 Td [(10.5)-1000(clone)-250(\227)-250(clone)-250(current)-250(preconditioner)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-1050(prec%clone\050precout,info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -77.917 -33.873 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(precout)]TJ -0 g 0 G -/F54 9.9626 Tf 39.292 0 Td [(A)-250(copy)-250(of)-250(the)-250(input)-250(object.)]TJ -0 g 0 G -/F51 9.9626 Tf -39.292 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.8 0 Td [(Return)-250(code.)]TJ -0 g 0 G - 140.583 -449.28 Td [(156)]TJ -0 g 0 G -ET - -endstream -endobj -1900 0 obj +% 2166 0 obj << -/Length 2703 +/D [2164 0 R /XYZ 98.895 753.953 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 11.9552 Tf 99.895 706.129 Td [(10.6)-1000(free)-250(\227)-250(Free)-250(a)-250(preconditioner)]TJ -0 g 0 G -0 g 0 G -/F59 9.9626 Tf 0 -18.964 Td [(call)-525(prec%free\050info\051)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -21.918 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Asynchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.925 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.925 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf 21.579 0 Td [(.)]TJ -53.011 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 337.631 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 340.77 577.576 Td [(prec)]TJ -ET -q -1 0 0 1 362.319 577.775 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 365.457 577.576 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(On)-250(Exit)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(prec)]TJ -0 g 0 G -/F54 9.9626 Tf 24.349 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -30.874 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(pr)18(econditioner)-250(data)-250(str)8(uctur)18(e)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 196.511 0 Td [(psb)]TJ -ET -q -1 0 0 1 337.631 502.059 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 340.77 501.86 Td [(prec)]TJ -ET -q -1 0 0 1 362.319 502.059 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 365.457 501.86 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -286.483 -19.925 Td [(info)]TJ -0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -30.326 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Err)18(or)-250(code:)-310(if)-250(no)-250(err)18(or)74(,)-250(0)-250(is)-250(r)18(eturned.)]TJ/F51 11.9552 Tf -24.907 -21.918 Td [(Notes)]TJ/F54 9.9626 Tf 34.363 0 Td [(Releases)-250(all)-250(internal)-250(storage.)]TJ -0 g 0 G - 130.021 -333.713 Td [(157)]TJ -0 g 0 G -ET - -endstream -endobj -1905 0 obj +% 597 0 obj << -/Length 548 +/D [2164 0 R /XYZ 99.895 554.06 null] +>> +% 601 0 obj +<< +/D [2164 0 R /XYZ 99.895 527.944 null] +>> +% 2167 0 obj +<< +/D [2164 0 R /XYZ 99.895 315.727 null] >> -stream -0 g 0 G -0 g 0 G -BT -/F51 14.3462 Tf 150.705 705.784 Td [(11)-1000(Iterative)-250(Methods)]TJ/F54 9.9626 Tf 0 -22.702 Td [(In)-402(this)-403(chapter)-402(we)-403(pr)18(ovide)-402(r)18(outines)-403(for)-402(pr)18(econditioners)-402(and)-403(iterative)-402(meth-)]TJ 0 -11.955 Td [(ods.)-472(The)-304(interfaces)-304(for)-304(Krylov)-304(subspace)-303(methods)-304(ar)18(e)-304(available)-304(in)-304(the)-304(module)]TJ/F59 9.9626 Tf 0 -11.955 Td [(psb_krylov_mod)]TJ/F54 9.9626 Tf 73.225 0 Td [(.)]TJ -0 g 0 G - 91.158 -568.734 Td [(158)]TJ -0 g 0 G -ET endstream endobj -1912 0 obj +2171 0 obj << -/Length 8246 +/Length 5390 >> stream 0 g 0 G 0 g 0 G BT -/F51 11.9552 Tf 99.895 706.129 Td [(11.1)-1000(psb)]TJ +/F59 11.9552 Tf 150.705 706.129 Td [(psb)]TJ ET q -1 0 0 1 153.407 706.328 cm +1 0 0 1 171.339 706.328 cm []0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 11.9552 Tf 156.993 706.129 Td [(krylov)-250(\227)-250(Krylov)-250(Methods)-250(Driver)-250(Routine)]TJ/F54 9.9626 Tf -57.098 -18.964 Td [(This)-266(subr)18(outine)-266(is)-267(a)-266(driver)-266(that)-267(p)1(r)18(ovides)-267(a)-266(general)-266(interface)-266(for)-267(all)-266(the)-266(Krylov-)]TJ 0 -11.955 Td [(Subspace)-250(family)-250(methods)-250(implemented)-250(in)-250(PSBLAS)-250(version)-250(2.)]TJ 14.944 -11.955 Td [(The)-250(stopping)-250(criterion)-250(can)-250(take)-250(the)-250(following)-250(values:)]TJ -0 g 0 G -/F51 9.9626 Tf -14.944 -18.774 Td [(1)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(normwise)-222(backwar)18(d)-221(err)18(or)-222(in)-221(the)-222(in\002nity)-222(norm;)-231(the)-221(iteration)-222(is)-222(stopped)-221(when)]TJ/F52 9.9626 Tf 109.036 -26.864 Td [(e)-15(r)-25(r)]TJ/F85 10.3811 Tf 15.14 0 Td [(=)]TJ/F83 10.3811 Tf 40.62 6.745 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F52 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ -ET -q -1 0 0 1 246.191 620.108 cm -[]0 d 0 J 0.398 w 0 0 m 74.372 0 l S -Q -BT -/F85 10.3811 Tf 246.316 610.783 Td [(\050)]TJ/F83 10.3811 Tf 4.274 0 Td [(k)]TJ/F52 9.9626 Tf 5.938 0 Td [(A)]TJ/F83 10.3811 Tf 7.442 0 Td [(k)-24(k)]TJ/F52 9.9626 Tf 11.048 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.876 1.96 Td [(k)]TJ/F85 10.3811 Tf 7.376 0 Td [(+)]TJ/F83 10.3811 Tf 10.256 0 Td [(k)]TJ/F52 9.9626 Tf 5.44 0 Td [(b)]TJ/F83 10.3811 Tf 4.861 0 Td [(k)]TJ/F85 10.3811 Tf 5.44 0 Td [(\051)]TJ/F61 10.3811 Tf 8.236 6.834 Td [(<)]TJ/F52 9.9626 Tf 11.087 0 Td [(e)-80(p)-25(s)]TJ -0 g 0 G -/F51 9.9626 Tf -235.842 -29.908 Td [(2)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F52 9.9626 Tf 136.209 -26.865 Td [(e)-15(r)-25(r)]TJ/F85 10.3811 Tf 15.141 0 Td [(=)]TJ/F83 10.3811 Tf 13.446 6.745 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F52 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ +/F59 11.9552 Tf 174.926 706.129 Td [(cuda)]TJ ET q -1 0 0 1 273.365 563.335 cm -[]0 d 0 J 0.398 w 0 0 m 20.025 0 l S +1 0 0 1 201.538 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F83 10.3811 Tf 273.49 554.01 Td [(k)]TJ/F52 9.9626 Tf 5.439 0 Td [(b)]TJ/F83 10.3811 Tf 4.862 0 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F61 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F52 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ -0 g 0 G -/F51 9.9626 Tf -208.668 -29.848 Td [(3)]TJ +/F59 11.9552 Tf 205.125 706.129 Td [(DeviceSync)-250(\227)-250(Synchronize)-250(CUDA)-250(device)]TJ +0.00 0.44 0.13 rg 0.00 0.44 0.13 RG +/F67 9.9626 Tf -54.42 -19.65 Td [(call)]TJ 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(Relative)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm;)-250(the)-250(iteration)-250(is)-250(stopped)-250(when)]TJ/F52 9.9626 Tf 134.486 -26.865 Td [(e)-15(r)-25(r)]TJ/F85 10.3811 Tf 15.14 0 Td [(=)]TJ/F83 10.3811 Tf 15.17 6.745 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F52 7.5716 Tf 4.041 -1.96 Td [(i)]TJ/F83 10.3811 Tf 2.875 1.96 Td [(k)]TJ -ET -q -1 0 0 1 271.641 506.622 cm -[]0 d 0 J 0.398 w 0 0 m 23.472 0 l S -Q -BT -/F83 10.3811 Tf 271.766 497.297 Td [(k)]TJ/F52 9.9626 Tf 5.34 0 Td [(r)]TJ/F54 7.5716 Tf 4 -1.744 Td [(0)]TJ/F83 10.3811 Tf 4.408 1.744 Td [(k)]TJ/F54 7.5716 Tf 5.315 -1.744 Td [(2)]TJ/F61 10.3811 Tf 8.371 8.578 Td [(<)]TJ/F52 9.9626 Tf 11.086 0 Td [(e)-80(p)-25(s)]TJ/F54 9.9626 Tf -210.391 -29.848 Td [(The)-248(behaviour)-248(is)-248(contr)18(olled)-249(by)-248(the)-248(istop)-248(ar)18(gument)-248(\050see)-248(later\051.)-310(In)-248(the)-248(above)-248(for)18(-)]TJ 0 -11.956 Td [(mulae,)]TJ/F52 9.9626 Tf 32.81 0 Td [(x)]TJ/F52 7.5716 Tf 5.147 -1.96 Td [(i)]TJ/F54 9.9626 Tf 5.303 1.96 Td [(is)-256(the)-256(tentative)-257(soluti)1(on)-257(and)]TJ/F52 9.9626 Tf 125.144 0 Td [(r)]TJ/F52 7.5716 Tf 4.042 -1.96 Td [(i)]TJ/F85 10.3811 Tf 5.757 1.96 Td [(=)]TJ/F52 9.9626 Tf 11.2 0 Td [(b)]TJ/F83 10.3811 Tf 6.822 0 Td [(\000)]TJ/F52 9.9626 Tf 10.777 0 Td [(A)-42(x)]TJ/F52 7.5716 Tf 12.759 -1.96 Td [(i)]TJ/F54 9.9626 Tf 5.303 1.96 Td [(the)-256(corr)18(esponding)-256(r)18(esidual)]TJ -225.064 -11.955 Td [(at)-250(the)]TJ/F52 9.9626 Tf 27.083 0 Td [(i)]TJ/F54 9.9626 Tf 2.964 0 Td [(-th)-250(iteration.)]TJ -28.305 -17.357 Td [(c)-175(a)-175(l)-174(l)-880(p)-105(s)-105(b)]TJ + [-525(psb_cuda_DeviceSync\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(This)-310(subr)18(outine)-310(ensur)18(es)-310(that)-309(all)-310(pr)18(eviosly)-310(invoked)-310(kernels,)-325(i.e.)-489(all)-310(invoca-)]TJ -14.944 -11.955 Td [(tion)-250(of)-250(CUDA-side)-250(code,)-250(have)-250(completed.)]TJ/F59 11.9552 Tf 0 -31.147 Td [(psb)]TJ ET q -1 0 0 1 150.28 433.215 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 171.339 620.243 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 154.313 433.015 Td [(k)-105(r)-105(y)-104(l)-105(o)-105(v)-238(\050)-156(m)-21(e)-22(t)-21(h)-22(o)-22(d)-218(,)-208(a)-242(,)-255(p)-80(r)-81(e)-80(c)-335(,)-191(b)-206(,)-203(x)-231(,)-234(e)-60(p)-59(s)-293(,)-273(d)-98(e)-97(s)-98(c)]TJ +/F59 11.9552 Tf 174.926 620.044 Td [(cuda)]TJ ET q -1 0 0 1 352.02 433.215 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 201.538 620.243 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 9.9626 Tf 355.983 433.015 Td [(a)-370(,)-283(i)-108(n)-108(f)-108(o)-274(,)-57(&)]TJ -227.086 -11.955 Td [(&)-580(i)-69(t)-69(m)-70(a)-69(x)-313(,)-327(i)-151(t)-152(e)-151(r)-478(,)-281(e)-107(r)-106(r)-387(,)-321(i)-145(t)-146(r)-146(a)-145(c)-146(e)-466(,)-336(i)-161(r)-160(s)-161(t)-496(,)-291(i)-116(s)-116(t)-116(o)-116(p)-407(,)-219(c)-43(o)-43(n)-44(d)-177(\051)]TJ -0 g 0 G -0 g 0 G -0 g 0 G -/F51 9.9626 Tf -29.002 -25.88 Td [(T)90(ype:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.828 0 Td [(Synchr)18(onous.)]TJ -0 g 0 G -/F51 9.9626 Tf -29.828 -19.349 Td [(On)-250(Entry)]TJ -0 g 0 G -0 g 0 G - 0 -19.349 Td [(method)]TJ -0 g 0 G -/F54 9.9626 Tf 39.851 0 Td [(a)-193(string)-194(that)-193(de\002nes)-194(the)-194(it)1(erative)-194(method)-193(to)-194(be)-194(use)1(d.)-292(Supported)-193(values)]TJ -14.944 -11.956 Td [(ar)18(e:)]TJ -0 g 0 G -/F51 9.9626 Tf 0 -19.349 Td [(CG:)]TJ -0 g 0 G -/F54 9.9626 Tf 22.964 0 Td [(the)-250(Conjugate)-250(Gradient)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -22.964 -15.364 Td [(CGS:)]TJ -0 g 0 G -/F54 9.9626 Tf 29.051 0 Td [(the)-250(Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -29.051 -15.365 Td [(GCR:)]TJ -0 g 0 G -/F54 9.9626 Tf 30.157 0 Td [(the)-250(Generalized)-250(Conjugate)-250(Residual)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -30.157 -15.364 Td [(FCG:)]TJ -0 g 0 G -/F54 9.9626 Tf 28.503 0 Td [(the)-250(Flexible)-250(Conjugate)-250(Gradient)-250(method)]TJ -0 0 1 rg 0 0 1 RG -/F54 7.5716 Tf 176.854 3.616 Td [(5)]TJ -0 g 0 G -/F54 9.9626 Tf 4.284 -3.616 Td [(;)]TJ -0 g 0 G -/F51 9.9626 Tf -209.641 -15.364 Td [(BICG:)]TJ -0 g 0 G -/F54 9.9626 Tf 33.484 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -33.484 -15.365 Td [(BICGST)92(AB:)]TJ -0 g 0 G -/F54 9.9626 Tf 59.696 0 Td [(the)-250(Bi-Conjugate)-250(Gradient)-250(Stabilized)-250(method;)]TJ -0 g 0 G -/F51 9.9626 Tf -59.696 -15.364 Td [(BICGST)92(ABL:)]TJ -0 g 0 G -/F54 9.9626 Tf 65.783 0 Td [(the)-218(Bi-Conjugate)-217(Gradient)-218(Stabilized)-218(method)-217(with)-218(r)18(estart-)]TJ -43.865 -11.955 Td [(ing;)]TJ -0 g 0 G -/F51 9.9626 Tf -21.918 -15.365 Td [(RGMRES:)]TJ +/F59 11.9552 Tf 205.125 620.044 Td [(getDeviceCount)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(ngpus)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 52.294 0 Td [(the)-250(Generalized)-250(Minimal)-250(Residual)-250(method)-250(with)-250(r)18(estarting.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -77.201 -19.349 Td [(a)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(the)-250(local)-250(portion)-250(of)-250(global)-250(sparse)-250(matrix)]TJ/F52 9.9626 Tf 178.414 0 Td [(A)]TJ/F54 9.9626 Tf 7.317 0 Td [(.)]TJ -170.787 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ -ET -q -1 0 0 1 309.258 138.701 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 312.397 138.501 Td [(Tspmat)]TJ + [-1050(psb_cuda_getDeviceCount\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Get)-250(number)-250(of)-250(devices)-250(available)-250(on)-250(curr)18(ent)-250(computing)-250(node.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 344.406 138.701 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 546.113 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 347.544 138.501 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G +/F59 11.9552 Tf 174.926 545.914 Td [(cuda)]TJ ET q -1 0 0 1 99.895 130.091 cm -[]0 d 0 J 0.398 w 0 0 m 137.482 0 l S +1 0 0 1 201.538 546.113 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F54 5.9776 Tf 110.755 123.219 Td [(5)]TJ/F54 7.9701 Tf 3.487 -2.893 Td [(Note:)-310(the)-250(implementation)-250(is)-250(for)]TJ/F52 7.9701 Tf 113.297 0 Td [(F)-31(C)-45(G)]TJ/F85 8.3049 Tf 16.387 0 Td [(\050)]TJ/F54 7.9701 Tf 3.319 0 Td [(1)]TJ/F85 8.3049 Tf 4.085 0 Td [(\051)]TJ/F54 7.9701 Tf 3.32 0 Td [(.)]TJ -0 g 0 G -0 g 0 G -/F54 9.9626 Tf 9.629 -29.888 Td [(159)]TJ -0 g 0 G -ET - -endstream -endobj -1923 0 obj -<< -/Length 7054 ->> -stream -0 g 0 G +/F59 11.9552 Tf 205.125 545.914 Td [(getDevice)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(ngpus)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -BT -/F51 9.9626 Tf 150.705 706.129 Td [(prec)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 24.348 0 Td [(The)-250(data)-250(str)8(uctur)18(e)-250(containing)-250(the)-250(pr)18(econditioner)74(.)]TJ 0.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ + [-1050(psb_cuda_getDevice\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Get)-250(device)-250(in)-250(use)-250(by)-250(curr)18(ent)-250(pr)18(ocess.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 360.068 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 471.983 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 363.206 658.308 Td [(prec)]TJ +/F59 11.9552 Tf 174.926 471.784 Td [(cuda)]TJ ET q -1 0 0 1 384.755 658.507 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 471.983 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 387.893 658.308 Td [(type)]TJ +/F59 11.9552 Tf 205.125 471.784 Td [(setDevice)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -258.11 -22.202 Td [(b)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 11.068 0 Td [(The)-250(RHS)-250(vector)74(.)]TJ 13.838 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ -ET -q -1 0 0 1 410.618 588.484 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 413.756 588.285 Td [(T)]TJ + [-525(psb_cuda_setDevice\050dev\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Set)-250(device)-250(to)-250(be)-250(used)-250(by)-250(curr)18(ent)-250(pr)18(ocess.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 419.614 588.484 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 397.853 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 422.752 588.285 Td [(vect)]TJ +/F59 11.9552 Tf 174.926 397.654 Td [(cuda)]TJ ET q -1 0 0 1 444.301 588.484 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 397.853 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 447.439 588.285 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ -0 g 0 G -/F51 9.9626 Tf -317.656 -22.202 Td [(x)]TJ +/F59 11.9552 Tf 205.125 397.654 Td [(DeviceHasUV)129(A)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(hasUva)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(initial)-250(guess.)]TJ 14.944 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(local)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.348 0 Td [(.)]TJ -56.148 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.687 0 Td [(psb)]TJ -ET -q -1 0 0 1 410.618 518.461 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S -Q -BT -/F59 9.9626 Tf 413.756 518.262 Td [(T)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_cuda_DeviceHasUVA\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Returns)-295(tr)8(ue)-294(if)-295(device)-295(curr)18(ently)-295(in)-294(use)-295(supports)-295(UV)111(A)-295(\050Uni\002ed)-294(V)55(irtual)-295(Ad-)]TJ -14.944 -11.955 Td [(dr)18(essing\051.)]TJ/F59 11.9552 Tf 0 -31.147 Td [(psb)]TJ ET q -1 0 0 1 419.614 518.461 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 311.768 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 422.752 518.262 Td [(vect)]TJ +/F59 11.9552 Tf 174.926 311.569 Td [(cuda)]TJ ET q -1 0 0 1 444.301 518.461 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 311.768 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 447.439 518.262 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 11.9552 Tf 205.125 311.569 Td [(W)74(arpSize)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(nw)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -317.656 -22.203 Td [(eps)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(The)-250(stopping)-250(tolerance.)]TJ 4.433 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.906 -22.203 Td [(desc)]TJ + [-525(psb_cuda_WarpSize\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Returns)-250(the)-250(warp)-250(size.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 171.218 426.236 cm -[]0 d 0 J 0.398 w 0 0 m 2.989 0 l S +1 0 0 1 171.339 237.638 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 174.926 237.439 Td [(cuda)]TJ +ET +q +1 0 0 1 201.538 237.638 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F51 9.9626 Tf 174.207 426.036 Td [(a)]TJ +/F59 11.9552 Tf 205.125 237.439 Td [(MultiProcessors)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(nmp)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 9.962 0 Td [(contains)-250(data)-250(str)8(uctur)18(es)-250(for)-250(communications.)]TJ -8.558 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.089 0 Td [(required)]TJ/F54 9.9626 Tf -27.089 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.801 0 Td [(in)]TJ/F54 9.9626 Tf 9.404 0 Td [(.)]TJ -41.205 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(str)8(uctur)18(ed)-250(data)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 168.138 0 Td [(psb)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_cuda_MultiProcessors\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Returns)-250(the)-250(number)-250(of)-250(multipr)18(ocessors)-250(in)-250(the)-250(CUDA)-250(device.)]TJ/F59 11.9552 Tf -14.944 -31.147 Td [(psb)]TJ ET q -1 0 0 1 360.068 378.415 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 171.339 163.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 363.206 378.216 Td [(desc)]TJ +/F59 11.9552 Tf 174.926 163.309 Td [(cuda)]TJ ET q -1 0 0 1 384.755 378.415 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 201.538 163.508 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 387.893 378.216 Td [(type)]TJ -0 g 0 G -/F54 9.9626 Tf 20.922 0 Td [(.)]TJ +/F59 11.9552 Tf 205.125 163.309 Td [(MaxThreadsPerMP)]TJ/F67 9.9626 Tf -54.42 -19.65 Td [(nt)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -258.11 -22.203 Td [(itmax)]TJ -0 g 0 G -/F54 9.9626 Tf 30.436 0 Td [(The)-250(maximum)-250(number)-250(of)-250(iterations)-250(to)-250(perform.)]TJ -5.529 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.569 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F85 10.3811 Tf 27.744 0 Td [(=)]TJ/F54 9.9626 Tf 10.961 0 Td [(1000.)]TJ -77.274 -11.956 Td [(Speci\002ed)-250(as:)-310(an)-250(integer)-250(variable)]TJ/F52 9.9626 Tf 142.349 0 Td [(i)-32(t)-25(m)-40(a)-42(x)]TJ/F83 10.3811 Tf 27.743 0 Td [(\025)]TJ/F54 9.9626 Tf 10.962 0 Td [(1.)]TJ -0 g 0 G -/F51 9.9626 Tf -205.961 -22.202 Td [(itrace)]TJ -0 g 0 G -/F54 9.9626 Tf 29.878 0 Td [(If)]TJ/F61 10.3811 Tf 11.007 0 Td [(>)]TJ/F54 9.9626 Tf 14.142 0 Td [(0)-422(print)-423(out)-422(an)-422(informational)-423(message)-422(about)-422(conver)18(gence)-423(every)]TJ/F52 9.9626 Tf -30.066 -11.955 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F54 9.9626 Tf 26.396 0 Td [(iterations.)-310(If)]TJ/F85 10.3811 Tf 56.313 0 Td [(=)]TJ/F54 9.9626 Tf 10.961 0 Td [(0)-250(print)-250(a)-250(message)-250(in)-250(case)-250(of)-250(conver)18(gence)-250(failur)18(e.)]TJ -93.724 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.956 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(Default:)]TJ/F52 9.9626 Tf 38.57 0 Td [(i)-32(t)-15(r)-50(a)-25(c)-25(e)]TJ/F85 10.3811 Tf 26.796 0 Td [(=)]TJ/F83 10.3811 Tf 11.086 0 Td [(\000)]TJ/F54 9.9626 Tf 8.194 0 Td [(1.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -109.553 -34.158 Td [(irst)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 19.915 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(r)18(estart)-250(parameter)74(.)]TJ 4.992 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.431 0 Td [(global)]TJ/F54 9.9626 Tf -31.431 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.186 0 Td [(.)]TJ -65.274 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)]TJ/F52 9.9626 Tf 34.613 0 Td [(i)-22(r)-35(s)-25(t)]TJ/F61 10.3811 Tf 17.671 0 Td [(>)]TJ/F54 9.9626 Tf 10.961 0 Td [(0.)-298(This)-214(is)-213(employed)-214(for)-213(the)-214(BiCGST)74(ABL)-214(or)-213(RGMRES)-214(meth-)]TJ -63.245 -11.955 Td [(ods,)-250(otherwise)-250(it)-250(is)-250(ignor)18(ed.)]TJ + [-525(psb_cuda_MaxThreadsPerMP\050\051)]TJ/F62 9.9626 Tf 14.944 -23.333 Td [(Returns)-250(the)-250(maximum)-250(number)-250(of)-250(thr)18(eads)-250(per)-250(multipr)18(ocessor)74(.)]TJ 0 g 0 G - 139.477 -29.888 Td [(160)]TJ + 149.439 -29.888 Td [(172)]TJ 0 g 0 G ET endstream endobj -1928 0 obj +2175 0 obj << -/Length 4489 +/Length 2721 >> stream 0 g 0 G 0 g 0 G -0 g 0 G BT -/F51 9.9626 Tf 99.895 706.129 Td [(istop)]TJ -0 g 0 G -/F54 9.9626 Tf 27.666 0 Td [(An)-250(integer)-250(specifying)-250(the)-250(stopping)-250(criterion.)]TJ -2.759 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf 38.187 0 Td [(.)]TJ -65.275 -11.956 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(in)]TJ/F54 9.9626 Tf 9.405 0 Td [(.)]TJ -41.205 -11.955 Td [(V)92(alues:)-351(1:)-351(use)-271(the)-270(normwise)-271(backwar)18(d)-270(err)18(or)74(,)-276(2:)-351(use)-271(the)-270(scaled)-271(2-norm)-270(of)]TJ 0 -11.955 Td [(the)-250(r)18(esidual,)-250(3:)-310(use)-250(the)-250(r)18(esidual)-250(r)18(eduction)-250(in)-250(the)-250(2-norm.)-310(Default:)-310(2.)]TJ -0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(On)-250(Return)]TJ -0 g 0 G -0 g 0 G - 0 -19.926 Td [(x)]TJ -0 g 0 G -/F54 9.9626 Tf 9.963 0 Td [(The)-250(computed)-250(solution.)]TJ 14.944 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(inout)]TJ/F54 9.9626 Tf 24.349 0 Td [(.)]TJ -56.149 -11.955 Td [(Speci\002ed)-250(as:)-310(a)-250(rank)-250(one)-250(array)-250(or)-250(an)-250(object)-250(of)-250(type)]TJ -0 0 1 rg 0 0 1 RG -/F59 9.9626 Tf 218.688 0 Td [(psb)]TJ +/F59 11.9552 Tf 99.895 706.129 Td [(psb)]TJ ET q -1 0 0 1 359.808 558.881 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 362.947 558.682 Td [(T)]TJ +/F59 11.9552 Tf 124.116 706.129 Td [(cuda)]TJ ET q -1 0 0 1 368.804 558.881 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 150.729 706.328 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 371.943 558.682 Td [(vect)]TJ +/F59 11.9552 Tf 154.315 706.129 Td [(MaxRegistersPerBlock)]TJ/F67 9.9626 Tf -54.42 -18.964 Td [(nr)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ +0 g 0 G +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG +0 g 0 G + [-525(psb_cuda_MaxRegistersPerBlock\050\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(Returns)-250(the)-250(maximum)-250(number)-250(of)-250(r)18(egister)-250(per)-250(thr)18(ead)-250(block.)]TJ/F59 11.9552 Tf -14.944 -29.133 Td [(psb)]TJ ET q -1 0 0 1 393.492 558.881 cm -[]0 d 0 J 0.398 w 0 0 m 3.138 0 l S +1 0 0 1 120.53 636.313 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 636.114 Td [(cuda)]TJ +ET +q +1 0 0 1 150.729 636.313 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S Q BT -/F59 9.9626 Tf 396.63 558.682 Td [(type)]TJ +/F59 11.9552 Tf 154.315 636.114 Td [(MemoryClockRate)]TJ/F67 9.9626 Tf -54.42 -18.964 Td [(cl)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.921 0 Td [(.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -317.656 -19.925 Td [(iter)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 20.473 0 Td [(The)-250(number)-250(of)-250(iterations)-250(performed.)]TJ 4.434 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(an)-250(integer)-250(variable.)]TJ + [-525(psb_cuda_MemoryClockRate\050\051)]TJ/F62 9.9626 Tf 14.944 -21.917 Td [(Returns)-250(the)-250(memory)-250(clock)-250(rate)-250(in)-250(KHz,)-250(as)-250(an)-250(integer)74(.)]TJ/F59 11.9552 Tf -14.944 -29.133 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 566.299 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 566.1 Td [(cuda)]TJ +ET +q +1 0 0 1 150.729 566.299 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 154.315 566.1 Td [(MemoryBusW)37(idth)]TJ/F67 9.9626 Tf -54.42 -18.964 Td [(nb)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(err)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F54 9.9626 Tf 17.714 0 Td [(The)-250(conver)18(gence)-250(estimate)-250(on)-250(exit.)]TJ 7.193 -11.956 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-250(as:)-310(a)-250(r)18(eal)-250(number)74(.)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(cond)]TJ + [-525(psb_cuda_MemoryBusWidth\050\051)]TJ/F62 9.9626 Tf 14.944 -21.918 Td [(Returns)-250(the)-250(memory)-250(bus)-250(width)-250(in)-250(bits.)]TJ/F59 11.9552 Tf -14.944 -29.133 Td [(psb)]TJ +ET +q +1 0 0 1 120.53 496.284 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 124.116 496.085 Td [(cuda)]TJ +ET +q +1 0 0 1 150.729 496.284 cm +[]0 d 0 J 0.398 w 0 0 m 3.587 0 l S +Q +BT +/F59 11.9552 Tf 154.315 496.085 Td [(MemoryPeakBandwidth)]TJ/F67 9.9626 Tf -54.42 -18.964 Td [(bw)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 27.119 0 Td [(An)-210(esti)1(mate)-210(of)-210(the)-209(condition)-210(number)-209(of)-210(matrix)]TJ/F52 9.9626 Tf 204.999 0 Td [(A)]TJ/F54 9.9626 Tf 7.318 0 Td [(;)-223(only)-210(available)-209(with)-210(the)]TJ/F52 9.9626 Tf -214.444 -11.956 Td [(C)-45(G)]TJ/F54 9.9626 Tf 17.001 0 Td [(method)-250(on)-250(r)18(eal)-250(data.)]TJ -17.086 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(global)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(optional)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.955 Td [(Returned)-249(as:)-310(a)-249(r)18(eal)-249(number)74(.)-310(A)-249(corr)18(ect)-250(r)18(esult)-249(will)-249(be)-249(gr)18(eater)-250(than)-249(or)-249(equal)]TJ 0 -11.955 Td [(to)-403(one;)-480(if)-403(speci\002ed)-403(for)-403(non-r)18(eal)-403(data,)-441(or)-403(an)-403(err)18(or)-403(occurr)18(ed,)-441(zer)18(o)-403(is)-403(r)18(e-)]TJ 0 -11.956 Td [(turned.)]TJ +0.40 0.40 0.40 rg 0.40 0.40 0.40 RG + [-525(=)]TJ 0 g 0 G -/F51 9.9626 Tf -24.907 -19.925 Td [(info)]TJ +0.73 0.73 0.73 rg 0.73 0.73 0.73 RG 0 g 0 G -/F54 9.9626 Tf 23.801 0 Td [(Err)18(or)-250(code.)]TJ 1.106 -11.955 Td [(Scope:)]TJ/F51 9.9626 Tf 31.432 0 Td [(local)]TJ/F54 9.9626 Tf -31.432 -11.955 Td [(T)90(ype:)]TJ/F51 9.9626 Tf 27.088 0 Td [(required)]TJ/F54 9.9626 Tf -27.088 -11.955 Td [(Intent:)]TJ/F51 9.9626 Tf 31.8 0 Td [(out)]TJ/F54 9.9626 Tf 14.944 0 Td [(.)]TJ -46.744 -11.956 Td [(An)-250(integer)-250(value;)-250(0)-250(means)-250(no)-250(err)18(or)-250(has)-250(been)-250(detected.)]TJ + [-525(psb_cuda_MemoryPeakBandwidth\050\051)]TJ/F62 9.9626 Tf 0 -21.918 Td [(Returns)-250(the)-250(peak)-250(memory)-250(bandwidth)-250(in)-250(MB/s)-250(\050r)18(eal)-250(double)-250(pr)18(ecision\051.)]TJ 0 g 0 G - 139.477 -161.394 Td [(161)]TJ + 164.384 -364.765 Td [(173)]TJ 0 g 0 G ET endstream endobj -1932 0 obj +2179 0 obj << /Length 81 >> @@ -26278,104 +31329,104 @@ stream 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 315.088 90.438 Td [(162)]TJ +/F62 9.9626 Tf 315.088 90.438 Td [(174)]TJ 0 g 0 G ET endstream endobj -1936 0 obj +2183 0 obj << -/Length 6590 +/Length 6353 >> stream 0 g 0 G 0 g 0 G BT -/F51 14.3462 Tf 99.895 705.784 Td [(References)]TJ -0 g 0 G -/F54 9.9626 Tf 4.982 -22.702 Td [([1])]TJ +/F59 14.3462 Tf 99.895 705.784 Td [(References)]TJ 0 g 0 G - [-500(D.)-314(Barbieri,)-330(V)129(.)-314(Car)18(dellini,)-330(S.)-314(Filippone)-314(and)-314(D.)-314(Rouson)]TJ/F52 9.9626 Tf 258.302 0 Td [(Design)-314(Patterns)-314(for)]TJ -241.705 -11.955 Td [(Scienti\002c)-224(Computations)-225(on)-224(Sparse)-224(Matrices)]TJ/F54 9.9626 Tf 175.255 0 Td [(,)-229(HPSS)-225(2011,)-229(Algorithms)-225(and)-224(Pr)18(o-)]TJ -175.255 -11.955 Td [(gramming)-396(T)92(ools)-395(for)-396(Next-Generation)-396(High-Performance)-396(Scie)1(nti\002c)-396(Soft-)]TJ 0 -11.955 Td [(war)18(e,)-250(Bor)18(deaux,)-250(Sep.)-250(2011)]TJ +/F62 9.9626 Tf 4.982 -23.579 Td [([1])]TJ 0 g 0 G - -16.597 -19.642 Td [([2])]TJ + [-500(G.)-362(Bella,)-391(S.)-362(Filippone,)-390(A.)-363(De)-362(Maio)-362(and)-363(M.)-362(T)92(esta,)]TJ/F60 9.9626 Tf 239.345 0 Td [(A)-362(Simulation)-362(Model)-363(for)]TJ -222.748 -11.955 Td [(For)18(est)-346(Fir)18(es)]TJ/F62 9.9626 Tf 47.93 0 Td [(,)-370(in)-346(J.)-346(Dongarra,)-370(K.)-346(Madsen,)-370(J.)-346(W)92(asniewski,)-370(editors,)-370(Pr)18(oceed-)]TJ -47.93 -11.955 Td [(ings)-338(of)-337(P)92(ARA)-338(04)-338(W)92(orkshop)-337(on)-338(State)-338(of)-338(the)-337(Art)-338(in)-338(Scienti\002c)-337(Computing,)]TJ 0 -11.955 Td [(pp.)-250(546\226553,)-250(Lectur)18(e)-250(Notes)-250(in)-250(Computer)-250(Science,)-250(Springer)74(,)-250(2005.)]TJ 0 g 0 G - [-500(G.)-362(Bella,)-391(S.)-362(Filippone,)-390(A.)-363(De)-362(Maio)-362(and)-363(M.)-362(T)92(esta,)]TJ/F52 9.9626 Tf 239.345 0 Td [(A)-362(Simulation)-362(Model)-363(for)]TJ -222.748 -11.955 Td [(For)18(est)-346(Fir)18(es)]TJ/F54 9.9626 Tf 47.93 0 Td [(,)-370(in)-346(J.)-346(Dongarra,)-370(K.)-346(Madsen,)-370(J.)-346(W)92(asniewski,)-370(editors,)-370(Pr)18(oceed-)]TJ -47.93 -11.955 Td [(ings)-338(of)-337(P)92(ARA)-338(04)-338(W)92(orkshop)-337(on)-338(State)-338(of)-338(the)-337(Art)-338(in)-338(Scienti\002c)-337(Computing,)]TJ 0 -11.955 Td [(pp.)-250(546\226553,)-250(Lectur)18(e)-250(Notes)-250(in)-250(Computer)-250(Science,)-250(Springer)74(,)-250(2005.)]TJ -0 g 0 G - -16.597 -19.642 Td [([3])]TJ + -16.597 -21.736 Td [([2])]TJ 0 g 0 G [-500(A.)-216(Buttari,)-223(D.)-216(di)-217(Sera\002no,)-223(P)129(.)-216(D'Ambra,)-223(S.)-216(Filippone,)-71(2LEV)74(-D2P4:)-293(a)-216(package)]TJ 16.597 -11.955 Td [(of)-247(high-performance)-247(pr)18(econditioners,)-107(Applicable)-247(Algebra)-247(in)-247(Engineering,)]TJ 0 -11.955 Td [(Communications)-292(and)-293(Computing,)-303(V)111(olume)-292(18,)-303(Number)-292(3,)-303(May)111(,)-303(2007,)-303(pp.)]TJ 0 -11.955 Td [(223-239)]TJ 0 g 0 G - -16.597 -19.642 Td [([4])]TJ + -16.597 -21.736 Td [([3])]TJ 0 g 0 G [-500(P)129(.)-248(D'Ambra,)-248(S.)-248(Filippone,)-249(D.)-248(Di)-248(Sera\002no)-107(On)-248(the)-248(Development)-248(of)-248(PSBLAS-)]TJ 16.597 -11.955 Td [(based)-523(Parallel)-524(T)90(wo-level)-523(Schwarz)-523(Pr)18(econditioners)-1235(Applied)-523(Numeri-)]TJ 0 -11.955 Td [(cal)-376(Mathematics,)-408(Elsevier)-376(Science,)-408(V)111(olume)-376(57,)-408(Issues)-376(11-12,)-408(November)18(-)]TJ 0 -11.955 Td [(December)-250(2007,)-250(Pages)-250(1181-1196.)]TJ 0 g 0 G - -16.597 -19.642 Td [([5])]TJ + -16.597 -21.736 Td [([4])]TJ 0 g 0 G [-500(Dongarra,)-268(J.)-265(J.,)-269(DuCr)18(oz,)-268(J.,)-269(Hammarli)1(ng,)-269(S.)-265(and)-264(Hanson,)-269(R.,)-268(An)-265(Extended)]TJ 16.597 -11.955 Td [(Set)-463(of)-463(Fortran)-463(Basic)-464(Linear)-463(Algebra)-463(Subpr)18(ograms,)-516(ACM)-463(T)90(rans.)-463(Math.)]TJ 0 -11.955 Td [(Softw)92(.)-250(vol.)-250(14,)-250(1\22617,)-250(1988.)]TJ 0 g 0 G - -16.597 -19.642 Td [([6])]TJ + -16.597 -21.736 Td [([5])]TJ 0 g 0 G [-500(Dongarra,)-422(J.,)-421(DuCr)18(oz,)-422(J.,)-422(Hammarling,)-422(S.)-387(and)-387(Duf)18(f,)-422(I.,)-422(A)-387(Set)-387(of)-388(level)-387(3)]TJ 16.597 -11.955 Td [(Basic)-308(Linear)-307(Algebra)-308(Subpr)18(ograms,)-322(ACM)-308(T)90(rans.)-308(Math.)-308(Soft)1(w)91(.)-307(vol.)-308(16,)-322(1\226)]TJ 0 -11.955 Td [(17,)-250(1990.)]TJ 0 g 0 G - -16.597 -19.642 Td [([7])]TJ + -16.597 -21.735 Td [([6])]TJ +0 g 0 G + [-500(J.)-349(J.)-350(Dongarra)-349(and)-350(R.)-350(C.)-349(Whaley)111(,)]TJ/F60 9.9626 Tf 166.061 0 Td [(A)-349(User)-37(')55(s)-350(Guide)-349(to)-350(the)-349(BLACS)-350(v)55(.)-349(1.1)]TJ/F62 9.9626 Tf 152.062 0 Td [(,)-374(La-)]TJ -301.526 -11.956 Td [(pack)-234(W)92(orking)-233(Note)-234(94,)-237(T)92(ech.)-233(Rep.)-234(UT)55(-CS-95-281,)-237(University)-234(of)-233(T)92(ennessee,)]TJ 0 -11.955 Td [(Mar)18(ch)-250(1995)-250(\050updated)-250(May)-250(1997\051.)]TJ 0 g 0 G - [-500(J.)-349(J.)-350(Dongarra)-349(and)-350(R.)-350(C.)-349(Whaley)111(,)]TJ/F52 9.9626 Tf 166.061 0 Td [(A)-349(User)-37(')55(s)-350(Guide)-349(to)-350(the)-349(BLACS)-350(v)55(.)-349(1.1)]TJ/F54 9.9626 Tf 152.062 0 Td [(,)-374(La-)]TJ -301.526 -11.955 Td [(pack)-234(W)92(orking)-233(Note)-234(94,)-237(T)92(ech.)-233(Rep.)-234(UT)55(-CS-95-281,)-237(University)-234(of)-233(T)92(ennessee,)]TJ 0 -11.955 Td [(Mar)18(ch)-250(1995)-250(\050updated)-250(May)-250(1997\051.)]TJ + -16.597 -21.735 Td [([7])]TJ 0 g 0 G - -16.597 -19.642 Td [([8])]TJ + [-500(I.)-259(Duf)18(f,)-262(M.)-260(Marr)18(one,)-261(G.)-260(Radicati)-259(and)-259(C.)-260(V)55(ittoli,)]TJ/F60 9.9626 Tf 223.407 0 Td [(Level)-259(3)-260(Basic)-259(Linear)-260(Algebra)]TJ -206.81 -11.955 Td [(Subpr)18(ograms)-328(f)1(or)-328(Sparse)-327(Matrices:)-465(a)-328(User)-327(Level)-328(Interface)]TJ/F62 9.9626 Tf 233.522 0 Td [(,)-347(ACM)-327(T)90(ransactions)]TJ -233.522 -11.956 Td [(on)-250(Mathematical)-250(Softwar)18(e,)-250(23\0503\051,)-250(pp.)-250(379\226401,)-250(1997.)]TJ 0 g 0 G - [-500(I.)-259(Duf)18(f,)-262(M.)-260(Marr)18(one,)-261(G.)-260(Radicati)-259(and)-259(C.)-260(V)55(ittoli,)]TJ/F52 9.9626 Tf 223.407 0 Td [(Level)-259(3)-260(Basic)-259(Linear)-260(Algebra)]TJ -206.81 -11.955 Td [(Subpr)18(ograms)-328(f)1(or)-328(Sparse)-327(Matrices:)-465(a)-328(User)-327(Level)-328(Interface)]TJ/F54 9.9626 Tf 233.522 0 Td [(,)-347(ACM)-327(T)90(ransactions)]TJ -233.522 -11.955 Td [(on)-250(Mathematical)-250(Softwar)18(e,)-250(23\0503\051,)-250(pp.)-250(379\226401,)-250(1997.)]TJ + -16.597 -21.735 Td [([8])]TJ 0 g 0 G - -16.597 -19.641 Td [([9])]TJ + [-500(I.)-282(Duf)18(f,)-289(M.)-282(Her)18(oux)-281(and)-282(R.)-281(Pozo,)]TJ/F60 9.9626 Tf 160.607 0 Td [(An)-282(Overview)-281(of)-282(the)-281(Sparse)-282(Basic)-282(Linear)-281(Al-)]TJ -144.01 -11.955 Td [(gebra)-287(Subpr)18(ograms:)-383(the)-287(New)-287(Standard)-286(fr)18(om)-287(the)-287(BLAS)-287(T)111(echnical)-286(Forum)]TJ/F62 9.9626 Tf 292.455 0 Td [(,)-296(ACM)]TJ -292.455 -11.955 Td [(T)90(ransactions)-250(on)-250(Mathematical)-250(Softwar)18(e,)-250(28\0502\051,)-250(pp.)-250(239\226267,)-250(2002.)]TJ 0 g 0 G - [-500(I.)-282(Duf)18(f,)-289(M.)-282(Her)18(oux)-281(and)-282(R.)-281(Pozo,)]TJ/F52 9.9626 Tf 160.607 0 Td [(An)-282(Overview)-281(of)-282(the)-281(Sparse)-282(Basic)-282(Linear)-281(Al-)]TJ -144.01 -11.956 Td [(gebra)-287(Subpr)18(ograms:)-383(the)-287(New)-287(Standard)-286(fr)18(om)-287(the)-287(BLAS)-287(T)111(echnical)-286(Forum)]TJ/F54 9.9626 Tf 292.455 0 Td [(,)-296(ACM)]TJ -292.455 -11.955 Td [(T)90(ransactions)-250(on)-250(Mathematical)-250(Softwar)18(e,)-250(28\0502\051,)-250(pp.)-250(239\226267,)-250(2002.)]TJ + -16.597 -21.736 Td [([9])]TJ 0 g 0 G - -21.579 -19.641 Td [([10])]TJ + [-500(S.)-298(Filippone)-298(and)-297(M.)-298(Colajanni,)]TJ/F60 9.9626 Tf 154.96 0 Td [(PSBLAS:)-298(A)-298(Library)-297(for)-298(Parallel)-298(Linear)-298(Alge-)]TJ -138.363 -11.955 Td [(bra)-340(Computation)-340(on)-340(Sparse)-340(Matrices)]TJ/F62 9.9626 Tf 151.715 0 Td [(,)-670(ACM)-340(T)90(ransactions)-340(on)-340(Mathematical)]TJ -151.715 -11.955 Td [(Softwar)18(e,)-250(26\0504\051,)-250(pp.)-250(527\226550,)-250(2000.)]TJ 0 g 0 G - [-500(S.)-298(Filippone)-298(and)-298(M.)-297(Colajanni,)]TJ/F52 9.9626 Tf 159.942 0 Td [(PSBLAS:)-298(A)-298(Library)-297(for)-298(Parallel)-298(Linear)-298(Alge-)]TJ -138.363 -11.955 Td [(bra)-340(Computation)-340(on)-340(Sparse)-340(Matrices)]TJ/F54 9.9626 Tf 151.715 0 Td [(,)-670(ACM)-340(T)90(ransactions)-340(on)-340(Mathematical)]TJ -151.715 -11.956 Td [(Softwar)18(e,)-250(26\0504\051,)-250(pp.)-250(527\226550,)-250(2000.)]TJ + -21.579 -21.736 Td [([10])]TJ 0 g 0 G - -21.579 -19.641 Td [([11])]TJ + [-500(S.)-397(Filippone)-398(and)-397(A.)-397(Buttari,)]TJ/F60 9.9626 Tf 151.683 0 Td [(Object-Oriented)-397(T)111(echniques)-397(for)-398(Sparse)-397(Matrix)]TJ -130.104 -11.955 Td [(Computations)-353(in)-353(Fortran)-354(2003)]TJ/F62 9.9626 Tf 126.789 0 Td [(,)-716(ACM)-353(T)90(ransactions)-353(on)-354(Mathematic)1(al)-354(Soft-)]TJ -126.789 -11.955 Td [(war)18(e,)-250(38\0504\051,)-250(2012.)]TJ 0 g 0 G - [-500(S.)-397(Filippone)-398(and)-397(A.)-397(Buttari,)]TJ/F52 9.9626 Tf 151.683 0 Td [(Object-Oriented)-397(T)111(echniques)-397(for)-398(Sparse)-397(Matrix)]TJ -130.104 -11.955 Td [(Computations)-353(in)-353(Fortran)-354(2003)]TJ/F54 9.9626 Tf 126.789 0 Td [(,)-716(ACM)-353(T)90(ransactions)-353(on)-354(Mathemati)1(cal)-354(Soft-)]TJ -126.789 -11.955 Td [(war)18(e,)-250(38\0504\051,)-250(2012.)]TJ + -21.579 -21.735 Td [([11])]TJ 0 g 0 G - -21.579 -19.642 Td [([12])]TJ + [-500(S.)-339(Filippone,)-360(P)129(.)-339(D'Ambra,)-360(M.)-339(Colajanni,)]TJ/F60 9.9626 Tf 202.675 0 Td [(Using)-339(a)-338(Parallel)-339(Library)-338(of)-339(Sparse)]TJ -181.096 -11.956 Td [(Linear)-350(Algebra)-350(in)-350(a)-349(Fluid)-350(Dynamics)-350(Applications)-350(Code)-350(on)-350(Linux)-349(Clusters)]TJ/F62 9.9626 Tf 307.21 0 Td [(,)-375(in)]TJ -307.21 -11.955 Td [(G.)-262(Joubert,)-264(A.)-262(Murli,)-264(F)92(.)-262(Peters,)-265(M.)-261(V)92(anneschi,)-265(editors,)-265(Parallel)-261(Computing)]TJ 0 -11.955 Td [(-)-250(Advances)-250(&)-250(Curr)18(ent)-250(Issues,)-250(pp.)-250(441\226448,)-250(Imperial)-250(College)-250(Pr)18(ess,)-250(2002.)]TJ 0 g 0 G - [-500(S.)-339(Filippone,)-360(P)129(.)-339(D'Ambra,)-360(M.)-339(Colajanni,)]TJ/F52 9.9626 Tf 202.675 0 Td [(Using)-339(a)-338(Parallel)-339(Library)-338(of)-339(Sparse)]TJ -181.096 -11.955 Td [(Linear)-350(Algebra)-350(in)-350(a)-349(Fluid)-350(Dynamics)-350(Applications)-350(Code)-350(on)-350(Linux)-349(Clusters)]TJ/F54 9.9626 Tf 307.21 0 Td [(,)-375(in)]TJ -307.21 -11.955 Td [(G.)-262(Joubert,)-264(A.)-262(Murli,)-264(F)92(.)-262(Peters,)-265(M.)-261(V)92(anneschi,)-265(editors,)-265(Parallel)-261(Computing)]TJ 0 -11.955 Td [(-)-250(Advances)-250(&)-250(Curr)18(ent)-250(Issues,)-250(pp.)-250(441\226448,)-250(Imperial)-250(College)-250(Pr)18(ess,)-250(2002.)]TJ + -21.579 -21.735 Td [([12])]TJ 0 g 0 G - 142.805 -29.888 Td [(163)]TJ + [-500(Gamma,)-217(E.,)-434(Helm,)-216(R.,)-434(Johnson,)-217(R.,)-434(and)-417(Vlissides,)-217(J.)-208(1995.)]TJ/F60 9.9626 Tf 276.041 0 Td [(Design)-208(Patterns:)]TJ -254.462 -11.956 Td [(Elements)-250(of)-250(Reusable)-250(Object-Oriented)-250(Softwar)18(e)]TJ/F62 9.9626 Tf 190.781 0 Td [(.)-250(Addison-W)92(esley)111(.)]TJ +0 g 0 G + -47.976 -29.887 Td [(175)]TJ 0 g 0 G ET endstream endobj -1945 0 obj +2192 0 obj << -/Length 3007 +/Length 4492 >> stream 0 g 0 G 0 g 0 G 0 g 0 G BT -/F54 9.9626 Tf 150.705 706.129 Td [([13])]TJ +/F62 9.9626 Tf 150.705 706.129 Td [([13])]TJ 0 g 0 G - [-500(Gamma,)-217(E.,)-434(Helm,)-216(R.,)-434(Johnson,)-217(R.,)-434(and)-417(Vlissides,)-216(J.)-209(1995.)]TJ/F52 9.9626 Tf 276.04 0 Td [(Design)-209(Patterns)1(:)]TJ -254.461 -11.955 Td [(Elements)-250(of)-250(Reusable)-250(Object-Oriented)-250(Softwar)18(e)]TJ/F54 9.9626 Tf 190.781 0 Td [(.)-250(Addison-W)92(esley)111(.)]TJ + [-500(Karypis,)-422(G.)-388(and)-387(Kumar)74(,)-422(V)129(.,)]TJ/F60 9.9626 Tf 149.763 0 Td [(METIS:)-388(Unstructur)18(ed)-387(Graph)-388(Partitioning)-388(and)]TJ -128.184 -11.955 Td [(Sparse)-452(Matrix)-452(Ordering)-452(System)]TJ/F62 9.9626 Tf 135.842 0 Td [(.)-452(Minneapolis,)-503(MN)-452(55455:)-714(University)-452(of)]TJ -135.842 -11.955 Td [(Minnesota,)-531(Department)-475(of)-476(Comp)1(uter)-476(Science,)-531(1995.)-475(Internet)-475(Addr)18(ess:)]TJ/F67 9.9626 Tf 0 -11.956 Td [(http://www.cs.umn.edu/~karypis)]TJ/F62 9.9626 Tf 156.91 0 Td [(.)]TJ 0 g 0 G - -212.36 -19.926 Td [([14])]TJ + -178.489 -19.925 Td [([14])]TJ 0 g 0 G - [-500(Karypis,)-422(G.)-388(and)-387(Kumar)74(,)-422(V)129(.,)]TJ/F52 9.9626 Tf 149.763 0 Td [(METIS:)-388(Unstructur)18(ed)-387(Graph)-388(Partitioning)-388(and)]TJ -128.184 -11.955 Td [(Sparse)-452(Matrix)-452(Ordering)-452(System)]TJ/F54 9.9626 Tf 135.842 0 Td [(.)-452(Minneapolis,)-503(MN)-452(55455:)-714(University)-452(of)]TJ -135.842 -11.955 Td [(Minnesota,)-531(Department)-475(of)-476(Comp)1(uter)-476(Science,)-531(1995.)-475(Internet)-475(Addr)18(ess:)]TJ/F59 9.9626 Tf 0 -11.955 Td [(http://www.cs.umn.edu/~karypis)]TJ/F54 9.9626 Tf 156.91 0 Td [(.)]TJ + [-500(Lawson,)-314(C.,)-314(Hanson,)-314(R.,)-313(Kincaid,)-314(D.)-301(and)-301(Kr)18(ogh,)-314(F)92(.,)-314(Basic)-301(Linear)-301(Algebra)]TJ 21.579 -11.955 Td [(Subpr)18(ograms)-288(for)-288(Fortran)-288(usage,)-298(ACM)-288(T)90(rans.)-288(Math.)-288(Softw)92(.)-288(vol.)-288(5,)-298(38\226329,)]TJ 0 -11.955 Td [(1979.)]TJ 0 g 0 G - -178.489 -19.925 Td [([15])]TJ + -21.579 -19.926 Td [([15])]TJ 0 g 0 G - [-500(Lawson,)-314(C.,)-314(Hanson,)-314(R.,)-313(Kincaid,)-314(D.)-301(and)-301(Kr)18(ogh,)-314(F)92(.,)-314(Basic)-301(Linear)-301(Algebra)]TJ 21.579 -11.956 Td [(Subpr)18(ograms)-288(for)-288(Fortran)-288(usage,)-298(ACM)-288(T)90(rans.)-288(Math.)-288(Softw)92(.)-288(vol.)-288(5,)-298(38\226329,)]TJ 0 -11.955 Td [(1979.)]TJ + [-500(Machiels,)-240(L.)-239(and)-238(Deville,)-240(M.)]TJ/F60 9.9626 Tf 146.833 0 Td [(Fortran)-238(90:)-304(An)-238(entry)-239(to)-238(object-oriented)-238(pr)18(ogram-)]TJ -125.254 -11.955 Td [(ming)-211(for)-210(the)-211(solution)-210(of)-211(partial)-210(differ)18(ential)-211(equations.)]TJ/F62 9.9626 Tf 211.899 0 Td [(ACM)-211(T)90(rans.)-210(Math.)-211(Softw)92(.)]TJ -211.899 -11.955 Td [(vol.)-250(23,)-250(32\22649.)]TJ 0 g 0 G -21.579 -19.925 Td [([16])]TJ 0 g 0 G - [-500(Machiels,)-240(L.)-239(and)-238(Deville,)-240(M.)]TJ/F52 9.9626 Tf 146.833 0 Td [(Fortran)-238(90:)-304(An)-238(entry)-239(to)-238(object-oriented)-238(pr)18(ogram-)]TJ -125.254 -11.955 Td [(ming)-211(for)-210(the)-211(solution)-210(of)-211(partial)-210(differ)18(ential)-211(equations.)]TJ/F54 9.9626 Tf 211.899 0 Td [(ACM)-211(T)90(rans.)-210(Math.)-211(Softw)92(.)]TJ -211.899 -11.955 Td [(vol.)-250(23,)-250(32\22649.)]TJ + [-500(Metcalf,)-251(M.,)-250(Reid,)-251(J.)-250(and)-251(Cohen,)-251(M.)]TJ/F60 9.9626 Tf 177.874 0 Td [(Fortran)-250(95/2003)-251(explained.)]TJ/F62 9.9626 Tf 112.004 0 Td [(Oxfor)18(d)-251(Uni-)]TJ -268.299 -11.955 Td [(versity)-250(Pr)18(ess,)-250(2004.)]TJ 0 g 0 G -21.579 -19.926 Td [([17])]TJ 0 g 0 G - [-500(Metcalf,)-251(M.,)-250(Reid,)-251(J.)-250(and)-251(Cohen,)-251(M.)]TJ/F52 9.9626 Tf 177.874 0 Td [(Fortran)-250(95/2003)-251(explained.)]TJ/F54 9.9626 Tf 112.004 0 Td [(Oxfor)18(d)-251(Uni-)]TJ -268.299 -11.955 Td [(versity)-250(Pr)18(ess,)-250(2004.)]TJ + [-500(Metcalf,)-258(M.,)-259(Reid,)-258(J.)-257(and)-256(Cohen,)-259(M.)]TJ/F60 9.9626 Tf 178.365 0 Td [(Modern)-257(Fortran)-256(explained.)]TJ/F62 9.9626 Tf 111.452 0 Td [(Oxfor)18(d)-257(Uni-)]TJ -268.238 -11.955 Td [(versity)-250(Pr)18(ess,)-250(2011.)]TJ 0 g 0 G -21.579 -19.925 Td [([18])]TJ 0 g 0 G @@ -26383,15 +31434,27 @@ BT 0 g 0 G -21.579 -19.926 Td [([19])]TJ 0 g 0 G - [-500(M.)-282(Snir)74(,)-290(S.)-282(Otto,)-289(S.)-282(Huss-Lederman,)-290(D.)-282(W)92(alker)-282(and)-282(J.)-281(Dongarra,)]TJ/F52 9.9626 Tf 304.659 0 Td [(MPI:)-282(The)]TJ -283.08 -11.955 Td [(Complete)-369(Refer)18(ence.)-369(V)74(olume)-369(1)-370(-)-369(The)-369(MPI)-369(Cor)18(e)]TJ/F54 9.9626 Tf 195.586 0 Td [(,)-399(second)-369(edition,)-399(MIT)-369(Pr)18(ess,)]TJ -195.586 -11.955 Td [(1998.)]TJ + [-500(M.)-282(Snir)74(,)-290(S.)-282(Otto,)-289(S.)-282(Huss-Lederman,)-290(D.)-282(W)92(alker)-282(and)-282(J.)-281(Dongarra,)]TJ/F60 9.9626 Tf 304.659 0 Td [(MPI:)-282(The)]TJ -283.08 -11.955 Td [(Complete)-369(Refer)18(ence.)-369(V)74(olume)-369(1)-370(-)-369(The)-369(MPI)-369(Cor)18(e)]TJ/F62 9.9626 Tf 195.586 0 Td [(,)-399(second)-369(edition,)-399(MIT)-369(Pr)18(ess,)]TJ -195.586 -11.955 Td [(1998.)]TJ +0 g 0 G + -21.579 -19.925 Td [([20])]TJ +0 g 0 G + [-500(D.)-314(Barbieri,)-330(V)129(.)-314(Car)18(dellini,)-330(S.)-314(Filippone)-314(and)-314(D.)-314(Rouson)]TJ/F60 9.9626 Tf 263.283 0 Td [(Design)-314(Patterns)-314(for)]TJ -241.704 -11.955 Td [(Scienti\002c)-224(Computations)-225(on)-224(Sparse)-224(Matrices)]TJ/F62 9.9626 Tf 175.254 0 Td [(,)-229(HPSS)-225(2011,)-229(Algorithms)-225(and)-224(Pr)18(o-)]TJ -175.254 -11.956 Td [(gramming)-396(T)92(ools)-395(for)-396(Next-Generation)-396(High-Performance)-395(Scienti\002c)-396(Soft-)]TJ 0 -11.955 Td [(war)18(e,)-250(Bor)18(deaux,)-250(Sep.)-250(2011)]TJ +0 g 0 G + -21.579 -19.925 Td [([21])]TJ +0 g 0 G + [-500(Car)18(dellini,)-494(V)129(.,)-988(Filippone)1(,)-494(S.,)-988(and)-890(Rouson,)-494(D.)-445(2014,)-494(Design)-445(patterns)]TJ 21.579 -11.955 Td [(for)-240(sparse-matrix)-240(computations)-240(on)-240(hybrid)-240(CPU/GPU)-240(platforms,)]TJ/F60 9.9626 Tf 284.952 0 Td [(Scienti\002c)]TJ -284.952 -11.955 Td [(Pr)18(ogramming)-250(22,)]TJ/F62 9.9626 Tf 73.713 0 Td [(1,)-250(1\22619.)]TJ +0 g 0 G + -95.292 -19.926 Td [([22])]TJ +0 g 0 G + [-500(D.)-200(Barbieri,)-210(V)129(.)-200(Car)18(dellini,)-210(A.)-201(Fanfarillo,)-210(S.)-200(Filippone,)-210(Thr)18(ee)-200(storage)-200(formats)]TJ 21.579 -11.955 Td [(for)-318(sparse)-317(matrices)-318(on)-318(GPGPUs,)-335(T)92(ech.)-317(Rep.)-318(DICII)-318(RR-15.6,)-334(Universit)]TJ 305.831 0.025 Td [(\036)]TJ -0.832 -0.025 Td [(a)-318(di)]TJ -304.999 -11.955 Td [(Roma)-250(T)92(or)-250(V)111(er)18(gata)-250(\050Febr)8(uary)-250(2015\051.)]TJ 0 g 0 G - 142.804 -352.677 Td [(164)]TJ + 142.805 -209.215 Td [(176)]TJ 0 g 0 G ET endstream endobj -1962 0 obj +2212 0 obj << /Length1 1383 /Length2 5908 @@ -26462,85 +31525,12 @@ W ò6S&ªå_!“½SÎ|esU›FÌR™y† ¢Y‹¥ýžï­§N endstream endobj -1964 0 obj +2214 0 obj << -/Length1 1537 -/Length2 2827 -/Length3 0 -/Length 4364 ->> -stream -%!PS-AdobeFont-1.0: CMITT10 003.002 -%%Title: CMITT10 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMITT10. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. -%%EndComments -FontDirectory/CMITT10 known{/CMITT10 findfont dup/UniqueID known{dup -/UniqueID get 5000779 eq exch/FontType get 1 eq and}{pop false}ifelse -{save true}{false}ifelse}{false}ifelse -11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /MPVPBL+CMITT10 def -/FontBBox {11 -233 669 696 }readonly def -/PaintType 0 def -/FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMITT10.) readonly def -/FullName (CMITT10) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle -14.04 def -/isFixedPitch true def -/UnderlinePosition -100 def -/UnderlineThickness 50 def -end readonly def -/Encoding 256 array -0 1 255 {1 index exch /.notdef put} for -dup 68 /D put -dup 97 /a put -dup 99 /c put -dup 100 /d put -dup 101 /e put -dup 33 /exclam put -dup 110 /n put -dup 111 /o put -dup 46 /period put -dup 115 /s put -dup 116 /t put -readonly def -currentdict end -currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ3Ã6Uöÿu4 -lã.ÂNk©4¡õ8•DPËh>®[M†E7wk9B½.…H‰ì.³CÍS†oøZçèqO«£­ OèÖ)9ú‚îOHl~Q:ÔɾD0j‚‡— J¿¶Ñù'JZ ¶ì÷­½`ÕÖÄB 5Ô†G -t²ðbY©7:¾Í¿2úh½K@¾À,¥Š 'çó,¥É÷ÙL’~## ê¥gˆyëMŽ¢iMæç?à úi8ðZ¼‡ý+ÐÕ’~Ëó[\3©èMn‚Àû=r=_­VxÎgpÄÁ÷¢%vïbÒ¨ˆSr\ËÆ©ˆµÉ*ó£'"$¶Æ9iVˆ«ð¬2Ý;äxÕ¿£ü§`e'ËôóÄ«fz•+‹;¥æ€îZÑ)¢Ëÿ˜ýæÓ£ÿ°¸¡4š¸ÍêYŽ,Œ1i$ÂWšºVÁC™oì _±S*`6 »Ij$¨I‹Ý/}„½!9'n0%3SV´ÝP}vÖÂqôsF}¶OTÓŒ+¼œO*½6ž½¸’I¤é*\-ÌRÚ3ª°ÚNÙœ:gd¿;p/V;aZþÀ6è©¡Ü:£“s–wQu+MÉô…àåF€Ãœ•Õ¡n>s ¯­›”Éå*ŸÁHPëix€†Î¡\x6²¥u±»j^tüâTg'Dà¦i&kò|í5Üé.Æ+ E¦0E,¼€ýâì»TÙ™~Ìçô#N™ƒa½¡X®Ï–Ý.-ÁŒüEæJ:êcx¶éΖçxh¯¤N ýT#O½£¤ #k]ºFÖ¼‚…XNÇw³iD—>?„Ö·Ç×—ßÁð”ÞÌÝ5vîY3ϳ7”@)‘r¬:J?ÅlM{OÞ{s…§ªø,¯ïܽóÿ^…$2œG £›Žl’G¿E¶Û¼Šì±@½ÝD²ŒÎu°ŽÈ]÷í–ú >2ëw‘3àêYq÷ÕÖ­±³¶Ã’¯ØP[Ë0q;˜‰HzÎ -¯'.«5¾"æmÙ«úN4Kª5ËàRcÄ8jÓLÛþY·Lö Š¼ÛâÕoÔ Œï­À32p!ûqb‰1Ù6#&9èŠV _ÌZ‡­wBsIéƒ$PDdÿc‰›³dîˆG°3Õ¡,­¼E šð»~ÍëÛ× -°Dt «J{\«©]Êô/yõ~UVV$-äIfygñEŸYŽ<ºáÝ Q«J\Z¿" -¦Hõàä1ÓRzìÛUS -ÕªµÏQÛhÏM°hËYF² -(ûGØÅÒ­û`÷D áç -Ž%=ÀýÇ Œû¨t’ ¯¸Ì ˆP›’JvŠÀï… ä‘¸,q­“™:ÿ7çÀ¹JUBRºgs¹PóÆ–b,-’>·½µeÁ¥|»ÉgEz´P5¤—õ2öá~ û'ÅÚñ®ØÖ#œ÷E’ä‰5kb†¼ÃNú&YÍJ†·ïG¾7ºV8#Â3ú²Ž0gVà|¶(抉˜ âBHÞàÂ4ÊÚÒ>¶__W!ýr£©N—(ÈÌክ\,¨!Ì¥ÃaåÃÄE›Žbã÷ô$·@ÂSìè*sOˤÌPøWógæµ½4ouçnHŠÊšOq%ÍØxh6`t%b)K0ñV•Òm¬³kqèWØQºcÕM -cPJ‰]¨6›œÿïµÇ¢`³óHÄG4ê¼ÅuO§d“áœ)u_è^Ö~µŸÅF]Î/ Ç€ ½ ë4lØ{kNæõ˜åíÉðÑ»!t¸=m~æ®ìø®"v_Š±dBtúýÞ¸±ÛRSª‡ É<¹øœ¸K-Pö\kìˆô®¸ ˜ÐÄo×ý¿DCîY¥J E!ûnÞ™ÿÉ -Ch¹gˆwý‘¢}ùÅ¡fuhk~7‹\ñøhi ¢ÜH˜gÝGv&›€ Û'kÜD®²šàWÚ©îÉÄ鯤Ä^3íÃé:o"éugŒ˜ìŠløæOÈ-È`á˜MjŠãj§ëb¦íÛ×?'Ѥ”п6žûŸ8ùÐ08Tâg$Þ(R5PP½;u¥4êôÓ© ˆD]o.ì—þÆý… Ôª²×³é<è›,oIÂÞ§ò ¤GÀvO¼7Iö OƒA—TWyí¸›¬{ &š¬Šž²R ”ÄXWAN8ijŸ³ùHUÖ`h"a@Qî=µC/>±è?I¾ÈãÊ©é",F°óyeŠ5èh<5¦‹¶'gç¬éŠ+Ïï7-Ç^fëí´8Í«±lUŸ?K°’0>Ú§cDœX,y±‚Éâa=Ä'ÿ·¾K)pw£ -ˆ‹NèšlU§¨·¾Š›”€dcÚ¸ZUµf Kê#v˜G¸ø@Hw¶²sPfC@µ¥™zS J[p—éÌ -þG?Oÿ'6ÄMpxX¹†n÷o“ó¨E*ØÔ5^ ÇÇ|@~¬ßðdˆ@¼M8É€DgäÒLžKý:ñâO:ι®ÿìÞ¯¾€e§£EzEX,¸u¢8ŒáWÔ¸Õ»PŽb'¯±Ôés¸-'†3¢ÀioCÈy z,Lf¶±p@•LéñK{ï¸ÃDhþ—[,s§œîæ£Y• ŒèG:œ¾{Ò¨ØU¿™5Fd‘vJ1Ã&!Ùƒ»ÿÓØî _uºYÿ²þß´Á,©„u8´(š¹õ³íK,Í{Íf1;Ö¼Re!X¨†–Äv„jÃÁy¦pßÖ–„×¹ŽFðÙy û`'¡ÿB`XÈŽCóÝXzï`´Aã¢îÀ<Üìk5(PU m–IzGÂùJþ.Èê=¤[¯!5q°Oëˆi³·ò‘ÆCÑu,)Ÿ`–÷„q:ì×@h«]â±Ù¼º…mž’gž“!Ÿ„ßh¼Ô“#Æs–1ªÐÞºŒsÜÓî~äU5Œu] ÍÐBO8V™D”EƒbØ/¤ž -IªŸXå£Õ§„ -¢²Ç¸jfÒ&ËÌô¡NÖ^k¡?h·&ÈÒ #РH}JgC ó3•ó¼>À•ƒÇ…ÚKЬűµ—&>-áéÄÛ#ƒ°kÜ¡T‚‡ó²Úã -vÖð4-tœÃ’»u•{ÌÞ½ifMVZÂgG4ÄHb©éY¿Å\þe# ÑåÒ«ÀDüͱU•œR“ Ú—JRw¥¾‚G*“ìõÊ"yh„ ”qCÄÊií:àË“(’RÚg³gü¼¢X_žøÑ/ÿÌ÷ wR}–,.}Ch›7´\å[Þ"Û“B¶+cË»~ï̓p הؓ–l†DZâN]‡Á„ð­³ü¥ &סŽšZ˜+ªÅrçø’Øv)^º¥aÓªÛ»öyÏ¢6¹cøÀÓDøVÖF“ŠÌw$Šíyâ ¥( H:º€=.¬›1eëÜPä12)g¿¡<8üŽ Õ ³&×o]ï7»ªoµüϾ-/}tõN5ÒY#£˜,¨!Ó:oãIÙL}ªeÝëbŠq€z¶Ï‹Û5†€9ÒÆïC• -{#¬°ÈÚ| -endstream -endobj -1966 0 obj -<< -/Length1 1416 -/Length2 6052 +/Length1 1442 +/Length2 6151 /Length3 0 -/Length 7468 +/Length 7593 >> stream %!PS-AdobeFont-1.0: CMMI10 003.002 @@ -26560,7 +31550,7 @@ FontDirectory/CMMI10 known{/CMMI10 findfont dup/UniqueID known{dup 11 dict begin /FontType 1 def /FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /SYFPBV+CMMI10 def +/FontName /TPELEW+CMMI10 def /FontBBox {-32 -250 1048 750 }readonly def /PaintType 0 def /FontInfo 10 dict dup begin @@ -26577,6 +31567,7 @@ FontDirectory/CMMI10 known{/CMMI10 findfont dup/UniqueID known{dup end readonly def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for +dup 44 /arrowhookleft put dup 62 /greater put dup 60 /less put readonly def @@ -26607,213 +31598,369 @@ D jà…О¤á”Á ˜|-°cL„ô´TŸîz~ÈœÿLú†7Ô¥­G+ZÀËŸBŒ±ßŠ¿ê=¸*&ÌT7«8~‡#˜ÒàÆ¿l•Â8¶²Å½¸hJû·¦Á ¢(jÿbå*=|tŒ,cÒ“0Å0l·ï°´ÍsŒ‰‡K› @û3Õ\îSN:8¡£'gé¦ßU?Þ)äÔRb%ÑûÚoŒ·€˜è>Š6 ãÄÇ~)˜ O’ «©ÇXw5Í/"Ëá|k™¨(e$/ÞJ«¾G ¸à“ÇõÁž0Mõo#hpÑTé%Äòå-R (>¥*Îú Ò¨ìÈ25ˆh¼èïº|I.5uÁ`QP£÷Ö‚)`ñ©—QQǶé(ü÷4“5•³^§ƒÞ„‚M߯oˆ5G*`?ÍRFM¨ àÀþÕÌgãŽêpÀfÚßnþ¾5Æ1d¬¤¹‘íKÄë[L y¸q ´BÀ¶$Ã@'ˆÊ6é!Wëà -ÎúE¶ÆI¹ï6ø,ûITÁ$‡þÎy›Ù‡î“H–pi¹æÿVchž]l&Ûß¡FÃÝ:¹ÂO¤é$#ÈŠ!D:¨‹…%)aã›Mx“ýÈÇ"_L—ú?Âd*ú:‡Ö0ï2]7x(ìº4p ®P8¼*.v5žsËb<õŽ³]LQ†0ï*ø¶Kí•×+·@>e.-ÚoªÓèþC®–¡Æ€lÊïÆÑ\~R+$vÃo–D¥™Úg†ßþyùê èaˆ çDNnrè*ïÌö®z ®høƒÙø[&¦Å&‡ÀÿæÔüq.®‹Êø¿ÝÐ¥‡ÄÎ¥ëÇl”Vx3õTª ö%LbzbSzÓ´1þ¿m‚­ ÛËžpØïMc(áÐÀ»yÁJqØü^ó]{¤+ÝSŠŠ1¶Ý¶Q< ÁŠM~Ñ |[77{>º5ÔŸ5I ¥ò8ŒÄÔ”_ki®d¥‘€4ÀcÖ@NÏ!T‘m + +ÎúE¶ÆI¹ï6ø,ûITÁ$‡þÎy›Ù‡î“H–pi¹æÿVchž]l&Ûß¡FÃÝ:¹ÂO¤é$#ÈŠ!D:¨‹…%)aã›Mx“ýÈÇ"_L—ú?Âd*ú:‡Ö0ï2]7x(ìº4p ®P8¼*.v5žsËb<õŽ³]LQ†0ï*ø¶Kí•×+·@>e.-ÚoªÓèþC®–¡Æ€lÊïÆÑ\~R+$vÃo–D¥™Úg†ßþyùê èaˆ çDNnrè*ïÌö®z ®høƒÙø[&¦Å&‡ÀÿæÔüq.®‹Êø¿ÝÐ¥‡ÄÎ¥ëÇl”Vx3õTª ö%LbzbSzÓ´Hv­÷šÒѦ·Sc)]M|Yœ÷IGC¼rç õrº²ô®ÍmGý­`—½2N×q$ž +n^Ø90ªI`V<_qŽnA¤©UÉ´Ý^°å"2ض>K;ägþæB +r sb¢VØb)l 2ÅþSŠ„T£­H+PMzjÔîëzËiÍfí$ŽDáŒMkV©° Ã?ã\Ù¼Ä>Ž¬߈ƶa¹<¡ÈèEE£ÜL½¦D…¯6=t¯–EÒ÷µ€ðàÀ.…(Á%FoÜ~­´ô6€rý¦\l•ù;séñ'$Öµ…ïß>ž4¹ÿr$ï2¨DZý~*®\R ù·Yi$=Þ¡¥k‡å'¯Öå“™úý.m…6¯Â–„tY0’O$ð)ºQñ’ršÈUQ…»+¶ßªúÔ4(¨«,‡°Qb8¬—mÓ©qý±âÀÁÝHä°=‘œ,YŠ8i  ÿ‡½ª”SZ6Žöã½mØ@ˆ»`é7~â"L7\ã¹ßFdN#FþýHR´K´KHpb40 2ÂÚ~HÇKε֊úøX endstream endobj -1968 0 obj +2216 0 obj << -/Length1 1429 -/Length2 6269 +/Length1 1688 +/Length2 8444 /Length3 0 -/Length 7698 +/Length 10132 >> stream -%!PS-AdobeFont-1.0: CMR10 003.002 -%%Title: CMR10 +%!PS-AdobeFont-1.0: CMSY10 003.002 +%%Title: CMSY10 %Version: 003.002 %%CreationDate: Mon Jul 13 16:17:00 2009 %%Creator: David M. Jones %Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMR10. +%Copyright: (), with Reserved Font Name CMSY10. % This Font Software is licensed under the SIL Open Font License, Version 1.1. % This license is in the accompanying file OFL.txt, and is also % available with a FAQ at: http://scripts.sil.org/OFL. %%EndComments -FontDirectory/CMR10 known{/CMR10 findfont dup/UniqueID known{dup -/UniqueID get 5000793 eq exch/FontType get 1 eq and}{pop false}ifelse +FontDirectory/CMSY10 known{/CMSY10 findfont dup/UniqueID known{dup +/UniqueID get 5096651 eq exch/FontType get 1 eq and}{pop false}ifelse {save true}{false}ifelse}{false}ifelse 11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /GIGFZE+CMR10 def -/FontBBox {-40 -250 1009 750 }readonly def -/PaintType 0 def -/FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMR10.) readonly def -/FullName (CMR10) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle 0 def +/FontType 1 def +/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def +/FontName /VKSUEJ+CMSY10 def +/FontBBox {-29 -960 1116 775 }readonly def +/PaintType 0 def +/FontInfo 9 dict dup begin +/version (003.002) readonly def +/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMSY10.) readonly def +/FullName (CMSY10) readonly def +/FamilyName (Computer Modern) readonly def +/Weight (Medium) readonly def +/ItalicAngle -14.04 def +/isFixedPitch false def +/UnderlinePosition -100 def +/UnderlineThickness 50 def +end readonly def +/Encoding 256 array +0 1 255 {1 index exch /.notdef put} for +dup 66 /B put +dup 72 /H put +dup 73 /I put +dup 32 /arrowleft put +dup 33 /arrowright put +dup 3 /asteriskmath put +dup 106 /bar put +dup 107 /bardbl put +dup 102 /braceleft put +dup 103 /braceright put +dup 50 /element put +dup 21 /greaterequal put +dup 20 /lessequal put +dup 0 /minus put +dup 54 /negationslash put +dup 112 /radical put +readonly def +currentdict end +currentfile eexec +ÙÖoc;„j²„¼ø°Aw-åÍßᾉYň5t&× {hLšGÒqB`d­Ë—PØ©†ÑÖ|*îøÎx\ÁœÞ–HŸtEÅãBð-¡ÉùóÁgedogÏ7—‰ãï‘Q`[['“WÖü…7Â3ç®æ¤ý¾sçZ9ë m ¦ö!–tAž¾ë‹Y!$átÊY\Žr[˜uTIUÏý‹iŽ÷B¼Œùyã[Ž™ÊÝÝÈœÆÅ—3ò¢Kï6­†zJ!žË’Ç‘œJ·s&B5öC©•"¥m†&Þ`7ÞûӢƻ‘Ç3uµgNC O~̶u³o5Ö=žÂÞò_ä›Tù/ ¸’‰h,°—$B%¯döʘï¯3lŸÃ ³¾ÛÿMÓo§¨÷YLÚ¶Œ~ƒ3^æÐø`š‘12]•=l8Çà£B6PlpËfWÚü<• %PIN!jáuË#,.õ£Åi«X“nôè¸È¼Ê‡ðo$îÒX½šëÄê‘·I5öÔV*E6t±K×o¿_)žÓRŸžP»¾G;Z@9ØÐ"V3¡T×TLç˜N Y:…A€Ókî©®Ú¬6Ì =‚¸=•8€0~ÛÐðDQì±ã QÂù^brð/2^KŸ¾Zh`§N'Jt4i¶ëüèBYÒð7ÖÞŸBþU_¨~\ŸfûŽõçôµïŸXA³ˆZl‰”¿Òã_£Ì½ZÆŽ˜ÀÐìý'•çxHµúöðbÊG:Ç"„¥l«¶5ºCÝƉU[ÆaL¼ô¸rÂÌf¶ôù 1[÷;4äp^èµNïpûªqBD 'Ø…93ã­M€&9{ ˆVD õ8 aе‚2Ön.hf¶ +äl?KÚ¢ë[$‹¸Š³úGpðöÃݺ±/JÇzc÷¥‚F8ûvP3Üà/XO6ÈyAo¿±î~ëç]WqD‚M¹ˆY4ߧ󆸢YÕÌ¢XPEðLý2ã_2¹ ­¹©oH•{ +1xÒÉê'¿¼u|»o øöúWNRšE”ò¦í™¹Óôû&”ã˜lFÿ\LRh1®Fžbñ¦­Ï}+‡l Cø] ¦¥ÛÂ(„vfÕoƒ+fÏœMëí³}÷l?c*Þˆ"î­^R¬æ]ªm†äÔi§„KªOÉÒŽ!I Œ²Ó²ûçõRþ_tÓW;™¿Ìñ˜Çu@(#ªt*̧Ó U œ{|ãõõQ}a3åF¨l•¾ó8x¬¤Òt’HWA¨Â­â;³!ÚVÞÐþ CºÊƒVo³—Ûvºžì’?³”;”œ±=ËÜ>,„Æ㧫¾\"«ù¶•šÑRívRC•Ø¥œQDh +Ó@_,žÇËœ½kbflçAIøP^ª³™wÙš¦dIÉHzk(c÷3ƒxûŠÉìSÊUPI³f{KΩÍs… îÍY¯¼¡í/ìvÁÏ[¡¹ûظL%O¡@ë™Hƒ†“<ÞP'ŽJ£ËÛv‘Õ,Æ$´"hU§M?ô³ë?á“p*Ö„7v×=ÛP1s}ãG“@¤N’5^ð3•‰Tå³8fº† yQ¦‡ƒ¹O)„´ ¬07ÓæÒ%… „G ¤ú’Rz£óófé{‡Ð^DhÜô<ä¢{™™âB•ËêÖ}úÀÆÙžs2f'CóyÞâ°_ǮӮ@]c8“±n7q'ƒæKˆü4kO=íþ«@ÖU ’¨Ül,ÝPoE‹ÞækÔ™*7q[Ú!ì÷ú·ÎØèÓ\o»‚^×$·&Œåóò]8l•ä·s¹&„„Á/ââ^)š!TåÄ€a óá¬îÙгá½S"¡;è•üu^S‰ +A5ò™6B=Û¢vnÛ™Tã­™±Ï¼"…Ñ÷©]/Í-}{ˆÑÆÉ@ŸÓ–+‹šiý©caÎpnÉÛã´ÓàÕ{ª N©‚ïh%sùªéðž ¹×áN¤‚äå¬VÚäÎǃ¿a©š]ôè?Õ, íòbt¡l“˜h6‘ÿOˆvÂ_§Ré̳9S ^Pí!Qp÷ Á÷¹r6÷¼Á;¥³Z–è¿ÆävØ9k yŽµH’yp;DÉ×ã “jí1œÐ:ðR„]¬Ý›bzGC?"%‚|eÝ¥w!è±–Í6Ë¥RPè$æ·¹:ÿ½Ô)ɽŽE#Øè¥d'¬Ãå¿-¹¶ È2/Å %ñŽ}‡Ù¿ŒØÜ mË…¯ZüÄ¢L8ÏÀôÙcËóµÏoÙ¬Š›LH¥ÔºKÜôó¶ž)˜õqž+×ÛcYy•ÅÍ»¥Ÿ› +_MÌŒý`-@³0Vý'ôœÏ±Ðö©tS¨É¤d?ݱX(ƒYƒ…Ä÷kNWµYÀíFØ<èB}³–ék³ÛÃßRí(ÚÕÏ^2Ø%0Aüýìm„»ŒÎél|mƒoÓÈì©4Q庭„¡y|“Úvï+~¤ŸÄ”EµcŽÔ²Ù’MÆë×/­öëtØ…üHeš†ÙPC´évNîç"G6} ¦ì ]µ1¡AÀŒ”+|ç°(ä¹VÕñߊG¨¬l7‚KfWzÞrŸ_í=ûG"{'ª4ˆXM ¦(¿Õ±†ïz/ܳ¶ÙzÒLÃZ„|¹‰DÆ4-ùá0Eíp¶ŠZSú¸óAÇᇬˆ³ÈáظA䔹Á½tl¾±ÎHý¡[TxsÎM¢…P‰*´ ee©¦f¦¾wkÚ‡øH1)7¨i"ÐÃï‚$0Øþ´«ÃV?i¿‰zóç8ÆÅ|¿¥>dÿjyTšÃµVmÇæ—áq§ÌgCÊ‘ó‘ïØÀºš9z»åTˆCÞ/Ät~º‘؆§o*í—9ŒÆrÏÕI‹¡oj¯UíKöxj¡º. ,Íø+b1°Ö²ñ ÃI›lDLïQZ3÷¶P-noô¼ò½'<ÐY½ße-ì1/øŒŸ7Œ*E5#—d‡ñ¤oŽ–{]Cª>$þF zg!Ј*£oºÓjŠÔ-Nûíͥͭoò8ÅKwQgÝpœ*1^‚.ÖŠˆÎÂë± LV‰~ôÈÿüøm laÎ ]%ì."¦`»©_®Q·iäpÀöÅŽÊs’5y.XƒÒÍ®)ñæ¢T¡í¦öd1KŸ¢k««ÌŠjÝ«¢n0zªG®áq‰·ëa©WQé%z„ÓJ²¹ØX¤¢Â3tIy0ľ£.Ó#‰Å[“#¤ƒD#EÔ‚’pp¯F*«¨õ±Þ˜vïrOÓdÎn7n ¤Òf9ƒ*¯ìX:õ¾ç>Ǽš:*ÍÞLfÍÃ9’&a’j;+{»0‡ )ÐÚA’gðK*êXKÂÛV¶'zÙZóÌAÚ) nîöÏ ³ÕT¼”ÙÛLÎßJëïòé^Di¨ý[¦ð:s<ܸ2Â!ñÞU‡ú|›–#ù5V„ëfÑUŠêqP}÷üÒ|=ÿŒš»¾GÂ5O|PÃèsš9ñ§Ž}äå“* ++gpA€,°,Á=|j«25CÉ‚7›õÕ ’ï–¯µ—Ø.é/F¨ɹË0¢–tRœáºŠ."we h¿P€ eäÖmpòîJ‡o´@°±â–ÿ_ðêAÒ3§âQé¿BVô~$ð9E¢ºj= +‹]? «è{w¯_53÷¥FîgÓ¿³4B™èä›ìA¨«á½q²ÏðËßÀÞÑ4µŠ‡¤Ðî1®PfÔ ¬ó,ÖSÅŸ6ôã¼ r¤§[‘ÝÁv`à ¼µ±ûô¿x÷G$ÿŸ„¥êÏ.}¡¹Î Ì”·¨ÜϾFÍ™”c°±š‘‚=­Áf!*˽ª./çwĉR8º(@ÙÙ‹šz c¸½@ÃN&Im—žÝ£>X!Èm•eñÊ@ÎlWÿ"ÒVCHèø8ÔkÕ‘ŨÍÀj]´nó3X>J¡z?ÙDí€=Nø…½bn {qbY(>"Ù8*苾ÉÍk¨y3øoâŠøÜ €ó‰Hã Dwâ¹ø]¤€ ½ëd {E’ʥËwÙá™.$Váçhtœözk$kk$]4ë¯ðcf¸¡ú¯»CW–@òËóû3ö—plQ¯ “Q晉bá¨/póµó T“q¾FÑ8_þÕA’¿nëQÔÿ¢mKŽ{&z;<Õ¿žmòã[i7Ìϴ뜣ñ'*ýqX€Tnòt<ÕŒkò/?©2m)žËÏfô¹¯í•áâ¢øy#(ã¯`%Ú{ª§!¼Qœ/0&sßx¹ôfçB«+¬Õr‹ïEßï[tÑÚgOKQ£$ú$²=à‚ל?êd@3c)]3ª–êÕÕ%*M}HÙ饞;Cd£á˜Érp½°1jýŸF¿ˆc…st•íG'÷âC\–ºp)­)öþœ_š¢)±Õ(¹ú›¥µ MMº +µ³„ qÆv&¶¯Ê÷Cßå3N¶LZs³wTPu{‘s¼ºÏ°øPž‹+-­©¡U‹—±FõUøP"»KΆ†+«¼ÒY¾e7?0«(•öidšL´;gk9Á(YI-Œl`®õî;`ÕçæAÐH:´® áYHb1“µâ»HÇ{¸wƒÇR^YÑš, + $F¨Ô–HDÙòV?pòyÑ—¹PÞ%F=»‚ÂÇÆB‡öÙö¡GKØPh³~N´[¸IÊ_ꈀCðTzÞÒf ç <Ó–”Œü`4|mƒE1ÖJ'¼ýáÝ$`}R ‚Ú\¨€âN2kf¢aóouJ73ê´òváê¿ôKÛW{œMÓÞ3?שMò]ù:W;v…˜Xâ;„B’ ‰§X.PáI;?Ÿ!T¤ˆ . ·sìöU]䮃’=ÑXŠþ ý’5´ÀŠ€r€Mt>y=¯†*ãbC¾~^ ½tÅAr±¡l'¶øŦ•üó\ô÷ØŸÉL냡P“&7ttöuGz;Kg4ÚóÑ;·u)"é³;úÜS•–Âv½ËðüõC~³?¿Jƒ»/’F"6U.°0>çô+ÜKQÓ„0"ÊÓ«Ñ=ëñséÞíƒxoJ]áת!Ìwü6OÝ.}‹ž€tëË5ð¢VâË©³+¬¥·¬­Àü7Š³Us‚š®Ö©ÆyçåÊÄ“TŸŒC5üGrgâUÄ5Ì$‡—Œ&|°µ¿Š~•ÝGªäØ8ž—ê ¡¯Mvåß(jt§ƒãß ñÏ¢nñí›]Î]ÅQÌW…OØ‘ˆm.Œ£Ž­ð R[¾ИoNÐngô IFç:?Ât”ØSUOÎcÆj2Cm[¥à«O7>kWúk¥¢©½ËX¯'ƒ²î¦œ ™#ñ–Ñt'X•d9+E7±ðNä™dŒ8{Å]òŒm¹/‡ Æñž»×Ž¥dv‹Y†Y–öþwopôé™!™EÄP7F?oÏÄjP9âò­"›Ág“BŠŒÎÅÙPîŽô;B^QO©“3ù¿*B 3³ƒun»$·ßIÆêÿŸ_Oeþ`Ô +˜$Àß`‚}-9Q/Ä¥åM³e€ž?ØÁÇÉêÒ°µìhä]Š¦bð¿ ýé»ê«_ê2<ÑøÍ–Æ+ìtª°[Ïœ±ˆJ¡þ©O@¤%M‘I‚Þé t¦µØvƒ†Õ\¹/bãÑÃòMVFÊ05ÿÎ+kIå2W9>f–}©· ÐøĶô3t‡¶«ÿº¡jª†¦Ã|"§´æôîÖ +ŸÇÉ[ÓLŠhŸŽY³)Øo57kßþ“#%H’\,¬xÎ|ôs¯sxí¥HžÍ†|¶;`Å vû%øÚ¯T[ícÖ]ï‚eŠ"G—Ujß«„yŠðtvxÕ:udQ‘uZy_²Td"“ª£b+çÁO†]Øl¯xà'6wòã6X²C¿DKOÁ‡f›(g@w]f¦V™ËÂ[ê~ëŒ1Fùé^A3v¬ ÇâO›vÒ¯¶2v +zúåT¸ÞúË0ÙÝb#ö›[›zyÏ’Õ$e1¬ÛkÕ5—dRAì`(Å…@{=yW>¿ðˆä>ú‘¯çy@æÅÿ¹Uáú;”œ±=ÃH;Çc{–ðÛ3û0´ü£^Õ®”)Õ™-{·O¹°OÖú2R¨Â´xÈèëˆQÅôwBgБâ¼ýSý»ž›"תl›bðöz;¹±˜IyÕ\E×ÁËê‰pr¨kM ·@@Âe&À 3•˜lÃʉtS÷æÄ%í)è/pRÇøÞÙÀª"2µu +˜Ee’5ø+ÄÙ°s§Zò'±¹§õ˜àçbˆQ²ôð°SlŸjÅö zvÞ€â~%.Kz\N¡Š„çüã÷Eê,ãeV2Ò=Mo)-JcÇVà¤:ÒUÙ2Ѹ>É1ˆ×È“"¿QWº ƒOýõÑ‘4*Ozûº.c¸ójc”«Y&üßÜØØÏìê=ë7”Y™´öøÈðê,A5nÊIßFŠ•/œ?üÌŠ™á ]as.´N/þCÕ»tÜýH…G50x}eb9—S&ùœ¬é0år¾à«J§H€‘ôN­\¿?^4'îœèb©É»ÏÀ':KÒ`wpcÏT–!ÜÛ_ÿ öH˜°øŠ¹ÝŠ†‘óþ{–ÅÚ}þH¿mZto”àÞê7Ê«¾ß‘ŸQžÇ·¾Ùbi>ÜœZO¤§ „Š“dºdFÕ¿cc +$"«˜ |ðhJQvB'\³Í%Ì6Âä²3è¯J)A‚g²+TÂò‰ªÀî§ø¼†ÊrnßãLì“O™ÁN”@µEzçHr$©d(|º÷Áµ*zˆÜtiF¾ Õˆ›º&«Öþ«Œ{Ó5rT[°)C÷ =t†<¢na‹´€ï3å—³b«m ~WåÒ¯Ò-ÿ¢-ã*_>Góe‡È¾ºJµö!De:ñ‘1˜´‹ 4Åæ*ørkd›«Tý^ï\ä”(^”å·|a›ÊuØÝj“íË•û‘ÇNø×–GÁÔß² ‚Óh$ èÇ™¼Op—H.'çHã •Æ"&+t +Õd[øÍZìÿÏ—ŒþEßÁ1Éøktê±èŸ/È”’Ë7ï\xßY +hÊ^IJvh>üã\/µ?1ªX½ ¶Ç½þnyWlÕ.ÀëŽýAñka«i»³Yè4!@¬ž.‰Sø§Eg_»ç’Ž;Ûâb$;ˆ ‡ëÂótO¯EN,×@ß}Ë€¡À°ù¢g~Ÿ ÕŒª ÿìÓf,ÚtäyÁÓô–¼yËV½Ðc) ¤ðävŠM‘/ã¶"{i è~?pÕ·uŠCùMãâ ÙD=Þϵ0íŽâsñίŸSÖ΄V"å'»„¬†B &ïä·!©U,)úÛö8®RîÞ’¼JRD0’õ˜]¨<±M£r|±98CÙ׫NnŸ3Œ®=Cøl·âÛ ¬¹?×NÛ©Y$£õçÌO6­¾f¥;çHÄ2¶h,°èbþ=&qfžN¢é]ŒÜŽbt¸íÑ)Œª~±ƒ2¾3ªØÈFG?‹ì¼ˆç¯[¡PöÝ«_²Nò2@´kÕ +ç=‘ûC:•æ¨ÞÛ‡uVÝÂ%Oÿ^øðí"¢….œ-ٛ冕ÅK =°‡¹G,üû[{uscˆœ–8`ÏÏ=ýŸ,†aÒ… 4` ÆÅ ú>,®Ò¶Ö!?îòy)ÃéFë°ËåðÜîá„ 6áò.sv¼¿×W«:iAaQ±Ãà²$gŒ§õp¬$~ÓɹPKx 8IöJ«cxîÈœ6ò‡Í¯MæHkbŒTþ':ËX¨ £¤ IÖ°(ý¥t#˜+Ëd‚Ô¶dó‘áÏS¿¨¸ëçTOöh†¯Ã¬tÙRŠ<%¢(•Ð^Ì0ƒ.9u¯»U€˜÷ÒŠo4jµѾ9fyï)œeYgSÆ¡’#åKçÅÚ)«šàAªÆ¾‰kW2«Ìê1#áÚE~Üì›—‡‘€"@q´¡Ñí ¼3ýÍõ•Ý|aTõçÑ ²õçýÜ=iï¬Péi~‡ô¶!zå ¿I·&;çXÃÔe”eg¥8:¨ÞæKäÓª¦Då‘K[h¾$Ñ)Zd‘7Ï’–íÁmQ¸õF*TÕ™<ƒÀ•çãDMI ÷%’ê¬w¨}Ö姞!‰zQl#9Éñkãt²_©¤#åJèôn}ƒcy6PoÀ Óq_eÝŒñU¶]6-Q +±ÄÕÎr™}—G~D"ØiŸfì=¤öóåºÄzиƒ“ÒhΚ¿(±0ÐóøEÕ‘çë17MæÔã1Zͬå +è-óÔÓ“<q­œžOvÉ™/$«ú—¶#9æuÀÙ®ywÓ ”}w|ê&ŽÏÓúÇ{×?M + –±Ï~x3Ï L'-@Zºkv•hcîåQ Ñßž†JˆˆQbù<¿ +w‘­q¤žÈ$vßKGš¹è¨„¶ù4Ý[ ß´g´<ôçæáŸìa’éÒø4Ð}×D+VWFº].ÈÙÌY3‚8:Þ­*£VtE§;Ôa‚£Æzv âäÂŽwâþê•žúT¬¶ÕƒU·ðÊT³Çd¹,lW³Þêa|‹h6*¡úšÿíA»TÒfÎ’*”•*òþð!#ÞÔY‡Ñ¬¶ù} ™©爳ܒ@ Ë\ÖÛæ mÏÈ©õÜüÍ—Ðh\‚§ÅÃ0jÉïzôÑdA7y,¾Y‘‰õŽM5?̶ÀÒí„´™¼Äðï•òa õ1@Ñê5~#uñÔÖ ð'µœYá! +5 +¡Gí´ž ¶ò +êN¨ /?|$Ŧm§®µØ<£¾$(Ò [ÎýxxGµÊt„Që¨WâÖ˳uÜu±_.3YôNò&ÍŽ”£3ôëãÜ¥OÛL'ÔŒP??赨ðÕT!Ã¥¿ÿQž2ð¾B#7<.*zÿš¢¹ÎWSá®6zÂçjÃQyºtˆH h´t«p1ÓÇ-Y£¤`ÑÊßòaÕóñ9·³Ö<‡còÊ¸í±¦½lXôüT"t:ój˜Y|… +ùÉ,»oêã%þ¥Ý|å.' õ@qÞ8Š‹¥;ÁÏ;Oƒ˜"!L’LÄpëÚ42ð»ûÓºKG||mHƒvÚ®Vka›WŒ£Ìè{9 ¦nRgÏëaZ87͈ºŒ,ÒKØçÚÐê¯`˜vÕ+ѯÊZôvÈø–˜Å^(!ñ|øÛ¢ç~†}Mr(îéÊÙîY|ÙÅ~v,§ ƒO5í³¸¤·«ÓØ€@ïÝŒzªé ?|1òá?º§êŠRâT*kqMèÜ+pª^ñ\¿u PÍoCàŠ½zž¿$Åt@ò[àB-ïE,ì9g"Ù#£.º±:N¦ßú=¸Bœ*×ÝjyqëÀl6Þ]{j?Ê"AsŸNŒÜ9 +p¿3úù¶ô4}|0IÇ•çÜ›‰Ó±ŠõͯHöûê¥=ÍÝY©É§ÿÂ`?ö.¼5‡=¯ØP?Ÿ9<¯N›¥;Içq×ÈW²s1nÙ„&mèÛÁLÖ§_&ÿßczÙqâ9lÁÉ£Øg^[Uz(ß@ °ï™üî]øbžéõ½|ŠÝæx‰LB=öÇÙ”6Wé€ÀîûÍ*Gçt.P/6œn¼ª±*Ç|›Dï¼ëäÑ'+T%^–Hho'«‰ÝÄE¬ÁÖZÑü©xøT*!i¦…ÍZ±w2l°f7ƒ*VÄÈš8Ÿîá+ÌzìïÀÿxŸâ67HÿÆ0 +bŠ|Ü<dF.ÊÂÃOü¶½£æ[–ßÈÝv°ó¿ö±O¨hgÃy‡Qì®—»å¶ ‘«h›0}¦ª»½˜Üõ,ïåè:Õ(Ñ–~¨–À–’6uü‹mXóR·VÃ<‡­Ð³JVíM\Fw%T6VÍvýÑQnéϨÿ¦:Ï M¤\Ú…Uá-:φ57M}ÎÞ!ãZ‘D ês‰ ç {$Qû0Ý„në®êÓ Ê(ãN3°Þ[( ½¶?™ÓŒoÜî9T½”ÒuæÒ«6nÀu÷ðD!qáíZ”ÞU›·ÍÜT”!pÚ¹VØžïó‡/…È\ýh^‡:ñ'.AµEõ¾S‹6P*BËY‹LŸ +endstream +endobj +2218 0 obj +<< +/Length1 1173 +/Length2 2916 +/Length3 0 +/Length 4089 +>> +stream +%!PS-AdobeFont-1.0: PazoMath 001.003 +%%CreationDate: Fri May 17 11:17:28 2002 +%%VMusage: 120000 150000 +11 dict begin +/FontInfo 14 dict dup begin +/version (001.003) readonly def +/Copyright ((c) Diego Puga, 2000, 2002.) readonly def +/Notice (Copyright (c) Diego Puga, 2000, 2002. Distributed under the GNU General Public License (http://www.gnu.org/copyleft/gpl.txt). As a special exception, permission is granted to include this font program in a PostScript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def +/FullName (Pazo Math) readonly def +/FamilyName (PazoMath) readonly def +/ItalicAngle 0 def +/isFixedPitch false def +/UnderlinePosition -100 def +/UnderlineThickness 50 def +/Weight (Regular) readonly def +end readonly def +/FontName /IKXQUG+PazoMath def +/Encoding 256 array +0 1 255 {1 index exch /.notdef put} for +dup 165 /infinity put +dup 229 /summation put +readonly def +/PaintType 0 def +/FontType 1 def +/FontMatrix [0.00100 0 0 0.00100 0 0] readonly def +/FontBBox {-40 -283 878 946} readonly def +currentdict end +currentfile eexec +ÙÖoc;„j˜›™t°ŸÆÌD[Ï|<3322ãý¿ô9I¸fÈÂÜ"ýÇXXH`ì{¶Ú(Ìb$`á«2 GyµÀ<“»»É\ð&’ÌMê¨ÒêµÂæCté+Ë…B›®JvÀÆ·mo÷Ïš}^ß¼ éYTY½·ÞCÒ]Sü=ÚnðÂt9x¦Ð>ÌíJòêKÌ1¾‹ž'r6iÁ’Xï¯Ü'l±­é Š”6ÑŸ±Ã=ïvª1]Øöüö•mñO ÀCüQC>õ¨iÔÜOLÈU½O³*TšÊcàTÙm(™ß‘À߀ ãº1ƒí˜U +nƒAŽYXÖþUÙ1ñ2¨×èèÒrÕrúE^Oy¤"â;ôDmÒà{O0Ò {2ÆR¿BÊi³¯N´íÌMÃýÙöéufû–L´”o¬ñ¹nõÛâ>å‘Gl»_ëí1¶#<ðW”\’ŽïJºÄaâ½æ„_«òF˜bÁ†ŠÆ~˜ ”M÷PsRƒî“ÛŬK5B|«BuG‰ÓŒ8öer(ßVΫ0¶\òFÌ+£S“hdr=ÊéÊVd^ RUl^›0ŒpEÉTXÛ½‡Ç›í—ë]Æ%’ÁÒ[õ”áT8ݯœû%²Uæ~ë½25ƒì£‡µK÷±;¯Fª<´FS$ÆÕû²9dwY€-u&‡i$ªò¼(O´fvdz¦‹ hº¾Ó¹ +s;¯“Ï‘èB]¢rø„£œ* 3Ü”™ Ãæ_&+Æ ÿªñ‹œÍ4‡ôÇ9¡Ï^WJ òÐlÆ.ņä»Y¹~^à³¾¨“òXq¹=ÂË;agâ £CgïÏÃÄ _%‰Æ˜¾JéD­í>‘ÚYÎ> Ø‹’¢á½næׯ ,G#¢Ä²›‹5À.êFŒE)a"™¬²ó p‘˜ßqÖo-Ù4Ï`7µò}§Óº¤WÌt_ceuÅÐóJŒ—h>ËÌÿÍú¸µ>Wþ¾ªÎ! ôA5j¾-±v`òç-Š?R ïtl.ƒe)öµ1=Ìï–kdSã¤iõä!5Ëþ_lŒœ<Ÿ ô—²ß-^ó­ª+éÍš‘ŽôùbøގçÞ©ÈÜ–™nWÿ%yïg§õ͚™ý÷3¡N>âVpä9ÿeŠ.•¯oÀÖÛv’£ˆóý“€hÓ; ÂÍ‹ ÀfW´\¹ü^yÑ_øÜ“±š¢'Úî1︖œø]c¨*Îç–" Õ磺û©@A.Øê’æXïècǽD&Ê<¼r”—kö¢ ¾Õ|lØ\†Ù·[ne_@úÄ_TkϺ©åhô´.5G´ ô­­†ï[î‚\?… nä½;göƒà¦ŸÌ’\]%»ÐAí¥ÝVY 3ã”è×?¼ ³ßMhš•L”I•*nÁFçÕqÄ™ñu §Šeñ÷~ÈÀA‚õE Q€P+%[sÖÐVÔÓ$—G… YRœ}aâÕÉèüHງ´×÷õÃPœ  Ð1t'yˆwBî`©Ö~›× +‹å$óe1cww9üBî6$Ê—TĶ¨³;ÀZæ (4¤9g»ð6Èv¡¡f«´§æ4J Ÿ¿¨]5(§™Wð ø„šs¹Ô˜tàaȳÅ<[EŽ¬p­üT˧Uü¥!µ”JtОÄò ©ýª?,–ÎL?!w ˜×ÃçsŸðg0Môh¾BÅ1êä³Í$sÉyѼÏåÐÛç8G•º“Š(ûûbÏí>z¨uݬ£ÿ­·¾‡Do­eHÐòñz)Ž^0 +9JžÒW –©fÅ'ÈUâ€ÀIõ•c‡‡eiÒä+¡5¦¤ÄH,ârø73g0Ÿ3{#Y°«ò_‰E^n2ÖÝ¥»„ þš®Á¿xdR$;‡ž9ÎÒÌ(ìúû·ŒfôñÂ÷n*±Ï×eHýc¸«AóìI…#†y`©û÷µ%,tÅ¢\µ ì)ÿ_ãV£+µó7À\öMþ7AÙ4R‚ã"З…@ðOÉ!1í$¥¯*ϤóÐŒsp%>*¿È–, QÄíŸÉ¥H;ºEÖ×Ùº~b@Ž—näÿ¦ sשÌXZðÛ–…ì+ ”¥ì, +ƒy¡¯#ö/–!A¨§L‹9¦Ú8üøž¢¼üT»a/±äe£T´,‰ut/u¼¶ÀÌî6ü~¯® áº1ûQAÔ‚\ñSøâWä{N?2¤±â/ïóG½?ÚÆE£æ« 8¼Ó®chbK3¿qí$£þ;+^ÇB(ƒñ¬¼¨xšà¸‰k‹:wMá…XÕ,³Óƒ~õ‚Rçÿþ[&¡òh¬8AµS #L'ɧÑT‘p¤®ú#{Ûª½¹>É;+â­¼’MÞuU§”‰[¡ä˜%Vû0ºEÄ—‚Ù²ˆC9Ø…ÀS©×Èu^sIÓ2\ØþÄï’¢ÒÊÍ. +>Íj5ÃxBÄ’Úª]˸fóŠâ*Á÷‡g|Üšx*c·fr+µB!ÔÃH9Râ€2Hî4^ÁEìdöãöÄ>>ž›J1öu£¬•z¹&¿ÐMLLœ÷i¸Úo™Jè$!¸ótîFoµÜ¸§_µO>yù,ÞªôÎ+ø±#îD‰ Ùqµø¾ýWô2³û  tE"Q×IŸÕ…À§Ø.e]‚Çùl wÙú,•Ó*¡"ŒœMf]­—ÍÆàiðñ/FSóê‚’.÷ìsãç_.nÁ1 àÕ/€ßâ8,‰}̨ó ÍÊçðW˜“‹‡ HFPšsµWkn3‰áa¸9`6I¼åŒùÿ݉Wi¦>|ä =ÔÙÙ1u¬Åª4ÿ¤ÅÔ…X·’HAH +ƒ$ü-UÒÛÊ3¤³# +endstream +endobj +2220 0 obj +<< +/Length1 1188 +/Length2 2740 +/Length3 0 +/Length 3928 +>> +stream +%!PS-AdobeFont-1.0: PazoMath-Italic 001.003 +%%CreationDate: Fri May 17 11:17:28 2002 +%%VMusage: 120000 150000 +11 dict begin +/FontInfo 14 dict dup begin +/version (001.003) readonly def +/Copyright ((c) Diego Puga, 2000, 2002.) readonly def +/Notice (Copyright (c) Diego Puga, 2000, 2002. Distributed under the GNU General Public License (http://www.gnu.org/copyleft/gpl.txt). As a special exception, permission is granted to include this font program in a PostScript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def +/FullName (Pazo Math Italic) readonly def +/FamilyName (PazoMath) readonly def +/ItalicAngle -9.50 def /isFixedPitch false def /UnderlinePosition -100 def /UnderlineThickness 50 def +/Weight (Regular) readonly def end readonly def +/FontName /DUJUUF+PazoMath-Italic def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for -dup 61 /equal put -dup 40 /parenleft put -dup 41 /parenright put -dup 43 /plus put +dup 97 /alpha put +dup 98 /beta put readonly def +/PaintType 0 def +/FontType 1 def +/FontMatrix [0.00100 0 0 0.00100 0 0] readonly def +/FontBBox {-70 -277 902 733} readonly def currentdict end currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ=Ó%åW˜)-{Ùr½uú•)¯œ‚ßröA•ÉÂÜãE(õ@Úý{ë¹´‡º“Q»û|ü_‘RÑå» -ØÐÆϤëA³Å -T@æ|ýq|Vk¹¿J% qu8P¢øwÄGx³ÅªÛ̆ÖåQæ¯6K üªÒ-UŒ\§Ô%¡bÕ"t-*ðxÔõöÓŸÏÿJ‘+ -}ì3¥{Zà2ŽùÕzݬT2sÀ$ZõÌÑ&{Bè–J×{¨¥ÜÆØx¹<¥Ÿ ,—Ü-.è2›¯iR‹n·Ã±vÌÙ¾1ä • „'Æ䓱©·_r"O¯»_‹t±3keè¾fBݼöVÁfj—Ú9Ò³ÿ Ô Yh뀸Á{û´qÝÉÊÆ-÷†—ºøÉ·ÊãÁ}W§?Å?gw1*Eh[ŠÜÛ:›—ªtÍÀWeI,Úv•8üa…<{‚ñBú1â¤00Z8ÃÌî5 |ñŽ}­µðN¾àÔ×lw]ƒÿƒ6Nÿb¿—ðð¤h3E`œ†(¡›EÁ‰¡Þ'Q;²eµÔƒª/ðà­.D©yL’â5ØñÇ$¥4IGÃå׮Ɋ\‚yjù9£.îV­U»5£Q!äì‹-ÍèµÈBƒ…Ä9ödèâö««B^éoVùð­wˆBÉŽáT7„¡‰¾R€ŸÀsO™GA‹¦÷ãÕà b#íTBv!m·WT=úãÍÀ¦üw,ªID%'¥ÙMÅKé<‡|Ù]ØD¥Ã°€@‰õ·€2²½Oø( œÝŽ%œæÛ#_½É·VñþBSTø¢#zàÃ×ÒÍã!ËÑã${l­\§4ĸ ;5Œ™l‘&!×〚óß=(ŒŠþfŠ¸ÞUwA´Ú±›ÔÆeßìÈÌB#½30>ÎìH ¤©'œÞL¥+ÿÖ“žjâz@ú¸=®s\²Þõ<…«=\ÀY$ßD?ˆUê Ðæü6•ŸÒxG¡QnÇ«šŠäi¯%Ö”ž!×Ñ -L2±[žšôã¥)!Œf…X‚cè‹ã<Õ'Ȳ–ñRP3E¡!„-:±'àþÐAJ¯.ÙIJCJÎJtª&Ã{76± -#VÐÃÐÀëÕÖHÕ“ó‹/©ÊF×gw"F5”wñSˆ$|¯ÿoQ<ÎçΗ Ñ`~¶<10©|™ÿ<»émï´·ùØœÞÉhðÞ „߭逵µ°°›?É`?ó7–ë _Ì [•¾ó8xP<41Õó=tÏøHÚ ;6™¹¸}rKu1é3ûšUCiš´Ht­²ÑÓ}öyÖþ~c%\ßqï€\ººæ4O4v![›_÷Iº¢°ï³¯üÆæc‹0ß«òMJùMŸY0¬Ìv{6ß”Ö\øà rŸqÉš"3=÷”TÒYK.ÜKêÉ)Á,GÊK{aˆCZBGž½ -yÛ(ŠK'†Ã2O0ÓV—®6IP$$lûéú¬¸-“à[ÀëÔeÍÉ‹øÇKÌ×&2%g”b¯z‡÷JÌ™,uo~M¬2ÄzÞu’™ ‡Ô6*š»*¦ Å «Ñÿ9¿Î¨ß?Ä£K”c)ËÏ÷á\ðgŸt­&?©ãŽÏÄôûnTëw½“æ¼$ dN\C¸›¤ÎÚOöÖÏ@¥W2xè8q!LdÒ®ãoë~ä1V/ŒÈB=@±ôúí!0’ÁY¡ozù$íùÐ÷Ç€%Ä­¿ÑÉwáÞá2]oªÃß ®_ý?IhËuè¡BmîF:µ1Ø–d©Èy6÷‚ÀÚÞ F ¨„Ln5Sòw&©-ëÓ°¯žGzø¨9ê=È'’ä(Ìý¼IühNú´Ö{äO ˜1LvcÕmiµ¦À&È"MìêmÄ@·WÕ†­¤­mÜb%0Ù˜*ú`šëÄ¡“ß“/fõÃl½%PE2¡:VA)SBL/æ<ªî5ªCžA£ß÷l;®{ËÀ (#ºØXnF’"‹˜„8{RØo’ÁÄT(ä_WðbAþK|oM _ÖÓ—Wßþ‘½ yäMèÿ6€« ±^½÷atž@O1ë¡/Fñ”¶3y#™ãcMM×ÐGÅZýÝ[m¬ÿºzÏŒF lªù«"ÂJ>®eØVBWB}çV)ÜSÈÛi,¡ïÞ§ïR¸¥½»FǪ½P.è½LÑ€xø¦_[ÎXVˆÇóôQ ¯ TPÿõvƒ ù -eÊ,’pþÚƒrh“ú¸“uñ„Œ`+l{÷eQ÷Ö_@f63HßPavÑzUÍ_°ŸÍÆZ«1*ªäǯM+Çbp€†;{ÝªÌ ­Q3èVV;Ø¡¼ÍˆÓ ®O.Ožài,¼88«ÖƒìÆ…gŸ=\=à@"FàúKFøT¡bû˜‘¾VŽl(lg®ÁÚbÒ­Ý!¸èªÕ–ÕË\áýùa4Q1°Ì~ «9âqV¥¸§Ì¥ ’s^kí¢sdŽ›nz‘Á­£ç¤:ÖLžz]Žðªr Š¥¢ªƒvND¾‹MG79$ILiþ&“AØgánl6y‡J FOµò;ð,zÉ´Ñ% -o»¯ß xÈ0˜s‚ÖÈW1FxÏîÔè—ß gþm‘µxVyõµÚÑ"]#z~—§pvòÁ †‘™öØŨ.rB9‰$n¸YC­C1úÄtªd@#÷¨8ÐkN‡Ô‹K° Ó;£ÀêÔ“lFú( ØŒNóŠ¦;àá¥dþ¬Âûí’­üß*¼Š<Ʀ¡•ãÅ„ªsÀ¦9<µG CE`…Vé¢hà½ÉÄ%Q“ÎF¥oP½Äâу$ª½"Ñ33¼÷P9×9ìZs9‡'`ã­ ìrÚWù:N”¥ìÈsÓõÇ”·@fƒ œ^WÝ…&24w©lz æ$Ô2Wï²wÝ…ÆìO9bÿ£å"O¦ŸÊÍWÃ*F]Ý/¦`yªŒQ± ˆÉ”\Q•ÂF*ªÜiTñè^N öd_£M`ƒ¤†S%T³NºK7¯+R™Öì[r¥›üÈYõÅíYqe}¼±Ø ¥‹÷ØY‡Púô -ÎFUE¢ìóþŒðÓKƒÀÕHŒ'²µ>ZQ@ÎIÙàt%£äÿ–œÀ[¨)7Ä&¾ü hÎn ±éY'ÔÛa¶TÃñ'XrÔß™/n_n•JL¡ (–@ -²ç ËК -­µN{KFµßÛð^k›bÞÂnê~í`L¯£]Ûa¤U¡¹Å{üˆvâïLßܺæ¢4“ô1˜$rmAÿîs•ÂÆóònZß*™»=©÷Êsõðo›b@{‘yÖ !CC‹{c.WZ¶ìïæ0n àbp…gsP˜d$-Ñ\ö¯ÎÔ›&L27J»Býóµs/g(ô©xQ,t>[®â8Í“GsŸ:R$9]6&ˆúmy®)iµŠvÉéjÂöW¨‹LÄRB^RBal¿ëx³_ ×Y:²5M§¯ßæîÛ»­µ¨û/j¤…àÅÙ¡!}Þ`”îëD&W€ÈªäÖQX¹$HߧçÌtI¤­¼ ÈÕ.ÙYñ@Øn«r–_ôìnÜ ¾OüãtAN-L n–‹—?_Þ^_…Ûg’µ~žÆ;`°Ðí¨KB‡Þð6eĶÁܯ‘¸ ->”èl¿Qp‘†«¢Ó)À†¶ùµ¥ò/;S' ^¹õ:ätJ±j@êx«³ƒE(0GÊ;κÛQqŽ©Ç”kT'¹ðž;ž ÷ÉDHj‡¢ãsr‚xÜ– -æ.Â>ÚÐÖ¯¯¡Ïk—®ÚlÄ;&–¯#<çÚš°òe)fÑäFz›`·Ç·?¢ãIc@SH&–<ÿ¼’îevn8F‚\ãV±ª3Åóa­A!yy¼ã:ñÓ+žc!£xð–‚bÏMþÖ‰á/f}3ß–›ë˦Çã"G©–­ç'fMsÑÔXWUý}üöµ·¸Yĵ^ €Ô×%jÍÇ!H¾ï‹OHÿÉÍŽ_ù¶‹;臲Îò‘âJ¸v¡²]Ü# -$ÙªÛ'Ch?5™<„Sý»íË°Áñ@“­ ša¬´€-™AcŠ†éloW´M(òr·§#Û`×ÐnY{jÛ`3|lv®`º'Il ýà±!å´ü¡õÓPÂÊì{”¢E¯˜ï=Îpaè^ ØhëOk`·î›ãpøú ­8uyÜ$rò–Ýó•u -f}cqÃ+5Áy~!ýÝ]ÕúøÀìU,­¾t )æi:Hò ¾O…ÄÛ(®_eOéFV¾þqÐ>¬uèàÄ-G:ˆáñÄzƒž…›HZ\%UívØÀ Ðzã^àËBy%€NT⸶3=®Xëà?¼mJ’ø-Ó3„÷iã×H@Üfσýk¸R.ë›õýv'€Çm.'ÍSãÂD»ÌÙlyü7“¢ YõÖC³­Åméµ^¶¯Gu5_G›øµÙ1¦ût/ŒÔ þàÌâð -à4‰±1­nˆ_ûD>}p+æfªg9h$ú¨Å]@ïMù`Ø^Ò¹GKö;5«¤wÇë»äIrqïA¨3fA‚ßYiu¬€ÂÝäÃò½ÑïìÒYréDõö:ÚdR¥Ý “At ë-ª,)âºaª¤ÉºhâÏ9[Ô§uÿSç9K’„ÄTK©aX$äwgÉ“¶Oƒ[âx‹qǃ£yI(–ÿ¼j5L§ªIÀcµ}|hÀ$‘zDâUÏÂw»Y+½ý¬Z:ÎïçÄϤŜ1¿òµlÙeÀÇ`uø@|¬ìÖ܈贚Æ^Ú6î/žyÑû`að¡¦È Š[¯ÎŠ`Ò¾ÌEž£ÂwÁìa ¡zæiòLa„}jÊ°çÉ@û£Šå}ÅxæïW¶™ån‡sŽ˜0n*{¯l?Z©zó‹•Ê‹>¸ç8íp-LÑ&È£ûïK`> `´¨%Â`–¢îÝ¿Ésª?ºvË»Æ^\ü‰*’ÖýQÊá¨Nbf…+Û¼¾Þ„žo" Ù§U ¼zÑógêgÍaäŽu´F Ñ™<ÃXúyݸª¬Ðú*¼Ègöÿ4?X2]Â4¨HšÍ¢›×òš^ÁÐqLeo²œ˜Uíó~ ÷¥’’Ð7>ì)Ƥó™Hnžº¨­A7"é{D†Rj)ÿ×¢Éÿ~áeàíVDJÕ|—ººX}ѧCrŠÈÆTÈAÈè fý˜£’”P};Š"Ó¼KöÓ#ΑT ·F÷ú\çý÷^Ô“ ;™ À|êR D\Ä‚ HuÔ6ÿQÅýИ'Q3Èb;x#¸Ž3óSÌ:ªjbíÛ%¸tJ‘/>Ê¡OÿîœrVÞÂx]°¹¼>)âïŸÁµ]€lù5o¾[ç¡ÐF;,N&¨'i©Š\±NQÁIòHÔÍôÐÀ?T:í@ØFïçNÿ SXÁÝ0±<¬)»hàWÖ½4f¹FD·hÛ.سT)^SÑ-…nw}ý, ˆZN¸EÐCô¯÷ Y ReµzÂÔöð3¼Ò\ -¶ØtíþþYá5`F~_›xÁƒUž|‹'X’µÇ‡„G@UÖ6I \™„çÜö>ü:ÉV_ûÉ>Õ¦9*I0}Óµ\"{Øï3UÚzÆñÉuäFœfÌÙ¤…õPߨÊ)Eˆ8‚üU¥œúw)®ðgŠËqÔ:àdj#¥¹¯ÜˆhqÇ ¥œ¼so -¹@’ -KG7ã"­|QЇæªúå Zòzs|…‚}£ÄLÕ´SŠ¥:€fªÕm¼§ÂA3ïÖðS 2Ä=‘Ö[³d·Ø“üJœ\ܳ­òcìÚ€üù|Ët¾j1ÑÆ›AçrþŠ®ÀÑ®þÌJvÂ,K| ò´…Ï•VÞœ -˜ü›Q„´[ÜÞ<×nrWso}7÷œxrpn„mõãHpF%·EÛä¦ä!$š«ˆºâÎRŒJuçñíˆ:OgŠ7Ç)¢B›@OûdfzîÙG0ž%άá†u¦cs4²ï…C¢[+‡Ö ­cù½¬pÑ v¯sÛpwb»“ xr{¬"Op½ñ q!ëtXœÑ¯}ú´Ú­›%àjé:Ò_;¨´Œ¾[ ;*vU¶þ8´Cé@³e®]FÌíJòêKÌ1¾‹ž'r6iÁ’Xï¯Ü'l±­é Š”6ÑŸ±Ã=ïvª1]Û +…Ä‘gåŠ@ìFÌ7žªàpqûÓÇíõ')øjjOh%SÂÊösq™·=ÓðŠ{ãÑ.] ˆZ§O7Ñ”î‘éEÕ„|Ûª´u±BwLà Ñ`'ª´&ÏÄ0C¶žg†)X0F?nÂY¼›«&‰Ñ8 ˹r¼]lpK¹GC°ÓpŒŒoÄŸ³g\´t/íþ–bj©ê}t SòEî š²£¾cûqR7¡VÀ“L%ìŽY'1³¼b¦êÇî ˆJ71Ù®SU3ÿ=ºg¡OŒä>b¸H”t ÈkK‚¤Ëc:ƒhíý׳A‹:FÁ.ÞxW˜(úäÿáüLÆ^œÓN1K °Ø@_»Pÿ̹>ýnéæn¹ïÂoùf³µ7-Uš EøßØÙlÇ#©+¢ÞöA5FÔ”¼kèyêî¾tçÚ:÷âR¶R¼ª2\þ¤3©Ìøí’ l ªvÕ"‘Úª> VÓûœ³ ù”íb¤…/¼¬þ™÷“&’¥íµæÝ"@Ñ4 +¹nD5¤tvÀñ_\…ž$ÎS„|Kp°ÔCOÂÈ1vF^É(âByÒ@ˆ‰`gˆFú—szVŽÖ‘‡ûŒ0‡ôÎQnÌýÙ .ßÔÒÄT=>>‡rÕYA,{ÓøÉ ÉQðƒ rçFä!Þs߸ߟ0×€ߎ5¥Ë¢Vô¼<æ4d(íênL¤Ø³[PCŠ©ì²;üŠ+qÞ¥œK V²:¾d£§&>J3)éÅ›ôQ6Põš|ÞB5 e í£ñ)\£@ˆ~7mÛknË‹ª$Ô¶úÜ–P±+‡‰™C}èòèo×*Ú(¨ã ·-ßè.4‰ÓÆ7§ŽWá?ø8"½N[.½^7/ÆŸéÌ3§þ/3±j(ĦwÿNÿË º×þ¸ wLjŽC; ìf&ŠŒcv^%€:ÆÃS,ƒ’yr8ºN@K}m>KW¿÷Kn­9×è}ê%F¢S–GhpŠx³‘„CŒå(¹€@òr XÕ#?FäÓØ0éʃº¾’#«w^øcuÖ»¢Î€…y’þ‡"¨-ÚlEƒ£#»~»Áõë¸%UŠôŠ²Í1b[ ^¨Â•1€›€@‹¹¶*°ßr«¹ÖEŒPâW‰ðS/õÆÛ?ÉûN!™õö< eâÙ* ‡ðWxaÏx"(äZ„°ÿ5Å…Á‹ÆQÎu?ÅéÊ +UªgHß{@^‘Κ2¤]Ʀyý±Î1°QÕÇ'Ç,êX§L káx2 ´ß]mÅÈŒÊ0Ó`픈âpº³:ì*:_k£Ãp +‚º“ÏŠÁ÷×%¼þ,‹•™véÈVíb]2q2嶖ćVas ¾|HzÊNê@=Ü«ˆ7ëik¬K‘\É}ŒpYöNsÏS)#â èôŠ„¡cÏì,áE¿×€ÝçZ†üí=D=0»Måaú‰‘fü~nÄľ»<ë“vž­QI¼q¿Š[ ‚]¹@²\ù‚$‹C{ª¸ÇŽžÏÝÞBŽ®0ôh”u‰9íp?{U¶ ~sU>í4ߧšïô2º¸ËB †o´b‘ ÜÎêbþObww»wªÏ>Â|ÎåGkh´6°–´„p rÎ[#õPŽ±"7à7vôÑÍEá8Ê–óõ›Á$;E£þ6×zóдR]}îâ¤#¤#ì+µëè0É"i%uCjíÀ,±|¯÷ÁšaÖSVMŒx›ËßÉ5àê‹Š ÿýp‰åÌÔ6‹ïUj(mEä*Üè~†mø™YÕdÇ¡æ—µàÁ¤w#ö`–9­Ôk?ÏÁ)€Â픥…Z©ÑÜçËW|Mü%H*'¢¤ýÝa>˜"VÑ›ÿ¸w5t¯’0QËPG“R%‘à®"ò*¸½¸“ý„B…u+Ö8®Î~®³j Xí< @œ±cêNÂnâøg¬YGй34ª¥ VÐ])¹Œ-l ÉŠ÷5þ¢•‘iþmÕ™UV#YÌ‚€¹{Û­õls»1å¶weØË(\‹OL‚õ EŸ¶É»Rþó¦Ô~–—[шP»så1\èŠzLê¦ÊòþDÿžg•SÂS[OÝu’p6æ(@1…Àâ£R›sP1í ?õ=ç¨úEìØœÆneM6N5‚†åw<3dÏq^cF‹‹òðž)ÿUÏIÂ2M õØFÅÖÆ¿äë~ãq +1йmدNIà B}ýfÈ·/ZºåMYöŽE(Y´Ö‹'á²¹¢ ú +T;U·Åì‡÷F7®<·{¬ÈÆLN-^×(¥£U'UAf0GivÄOð0P$Ý 4ÛªZ@º/³ˆ¯¤Ž.â}·L‡© ŠdlI/_Øfá°>k[عW‰¢SʪÇ1<" Ì£Çó—UÇÞ3 ±ÃØlÕûÆP.ý+\ÐõBÀl{½É‚ÞÄH%Õ¹éú'%œ’@-ÜƺüŒ˜¼žRÛSöÄëÊÂð覟yNuE—Ê|ž÷Ïù£ÝÝvæ‰^Ó+¾‹ÖåZúûR·kÚœ[ÙÈȈRÆÄýûÔ¶0ŠZHç: +wÆFe@g¬`!$à ûp±eš„pdwíé,Õ,#ã µ„Œž<€öx4¬¦«=u^‹Ur± ^‰ ÷´º×hIo Ç–mÝÐΧõ +AÖª^§Î±:™µd¸5âDѺL•‚:ÊÔU¨Ôœú-¾L::V +6» f žÑrº^‚7£ö1ë5aj³à¡Ašº£¨nWÉ©!µp© cílov®>ÕªI8©3!ÕÜ/ª˜ JÝÚ?|iî$dØ_ìñ(°/;Åb÷üÒ49Ãï•Û%X¾eÊal¥Øw픋v_Õ̹ûœ@dr®Ëp.álä§<çêÉJt~½ŸM«÷[£†yœ ä™û3¹î[Î{ +endstream +endobj +2222 0 obj +<< +/Length1 1756 +/Length2 19614 /Length3 0 -/Length 9743 +/Length 21370 >> stream -%!PS-AdobeFont-1.0: CMSY10 003.002 -%%Title: CMSY10 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMSY10. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. +%!PS-AdobeFont-1.0: LMRoman10-Regular 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 806 194 0 %%EndComments -FontDirectory/CMSY10 known{/CMSY10 findfont dup/UniqueID known{dup -/UniqueID get 5096651 eq exch/FontType get 1 eq and}{pop false}ifelse +FontDirectory/LMRoman10-Regular known{/LMRoman10-Regular findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse {save true}{false}ifelse}{false}ifelse -11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /DMJGRR+CMSY10 def -/FontBBox {-29 -960 1116 775 }readonly def -/PaintType 0 def +17 dict begin /FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMSY10.) readonly def -/FullName (CMSY10) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle -14.04 def +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMRoman10-Regular)readonly def +/FamilyName(LMRoman10)readonly def +/Weight(Normal)readonly def /isFixedPitch false def -/UnderlinePosition -100 def -/UnderlineThickness 50 def +/ItalicAngle 0 def +/UnderlinePosition -146 def +/UnderlineThickness 40 def end readonly def +/FontName /NCCVYE+LMRoman10-Regular def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for -dup 66 /B put -dup 72 /H put -dup 73 /I put -dup 32 /arrowleft put -dup 106 /bar put -dup 107 /bardbl put -dup 102 /braceleft put -dup 103 /braceright put -dup 50 /element put -dup 21 /greaterequal put -dup 20 /lessequal put -dup 0 /minus put -dup 54 /negationslash put -dup 112 /radical put +dup 91 /bracketleft put +dup 93 /bracketright put +dup 61 /equal put +dup 40 /parenleft put +dup 41 /parenright put +dup 43 /plus put readonly def +/PaintType 0 def +/FontType 1 def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-430 -290 1417 1127}readonly def currentdict end currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÍßᾉYň5t&× {hLšGÒqB`d­Ë—PØ©†ÑÖ|*îøÎx\ÁœÞ–HŸtEÅãBð-¡ÉùóÁgedogÏ7—‰ãï‘Q`[['“WÖü…7Â3ç®æ¤ý¾sçZ9ë m ¦ö!–tAž¾ë‹Y!$átÊY\Žr[˜uTIUÏý‹iŽ÷B¼Œùyã[Ž™ÊÝÝÈœÆÅ—3ò¢Kï6­†zJ!žË’Ç‘œJ·s&B5öC©•"¥m†&Þ`7ÞûӢƻ‘Ç3uµgNC O~̶u³o5Ö=žÂÞò_ä›Tù/ ¸’‰h,°—$B%¯döʘï¯3lŸÃ ³¾ÛÿMÓo§¨÷YLÚ¶Œ~ƒ3^æÐø`š‘12]•=l8Çà£B6PlpËfWÚü<• %PIN!jáuË#,.õ£Åi«X“nôè¸È¼Ê‡ðo$îÒX½šëÄê‘·I5öÔV*E6t±K×o¿_)žÓRŸžP»¾G;Z@9ØÐ"V3¡T×TLç˜N Y:…A€Ókî©®Ú¬6Ì =‚¸=•8€0~ÛÐðDQì±ã QÂù^brð/2^KŸ¾Zh`§N'Jt4i¶ëüèBYÒð7ÖÞŸBþU_¨~\ŸfûŽõçôµïŸXA³ˆZl‰”¿Òã_£Ì½ZÆŽ˜ÀÐìý'•çxHµúöðbÊG:Ç"„¥l«¶5ºCÝƉU[ÆaL¼ô¸rÂÌf¶ôù 1[÷;4äp^èµNïpûªqBD 'Ø…93ã­M€&9{ ˆVD õ8 aе‚2Ön.hf¶ -äl?KÚ¢ë[$‹¸Š³úGpðöÃݺ±/JÇzc÷¥‚F8ûvP3Üà/XO6ÈyAo¿±î~ëç]WqD‚M¹ˆY4ߧ󆸢YÕÌ¢XPEðLý2ã_2¹ ­¹©oH•{ -1xÒÉê'¿¼u|»o øöúWNRšE”ò¦í™¹Óôû&”ã˜lFÿ\LRh1®Fžbñ¦­Ï}+‡l Cø] ¦¥ÛÂ(„vfÕoƒ+fÏœMëí³}÷l?c*Þˆ"î­^R¬æ]ªm†äÔi§„KªOÉÒŽ!I Œ²Ó²ûçõRþ_tÓW;™¿Ìñ˜Çu@(#ªt*̧Ó U œ{|ãõõQ}a3åF¨l•¾ó8x¬¤Òt’HWA¨Â­â;³!ÚVÞÐþ CºÊƒVo³—Ûvºžì’?³”;”œ±=ËÜ>,„Æ㧫¾\"«ù¶•šÑRívRC•Ø¥œQDh -Ó@_,žÇËœ½kbflçAIøP^ª³™wÙš¦dIÉHzk(c÷3ƒxûŠÉìSÊUPI³f{KΩÍs… îÍY¯¼¡í/ìvÁÏ[¡¹ûظL%O¡@ë™Hƒ†“<ÞP'ŽJ£ËÛv‘Õ,Æ$´"hU§M?ô³ë?á“p*Ö„7v×=ÛP1s}ãG“@¤N’5^ð3•‰Tå³8fº† yQ¦‡ƒ¹O)„´ ¬07ÓæÒ%… „G ¤ú’Rz£óófé{‡Ð^DhÜô<ä¢{™™âB•ËêÖ}úÀÆÙžs2f'CóyÞâ°_ǮӮ@]c8“±n7q'ƒæKˆü4kO=íþ«@ÖU ’¨Ül,ÝPoE‹ÞækÔ™*7q[Ú!ì÷ú·ÎØèÓ\o»‚^×$·&Œåóò]8l•ä·s¹&„„Á/ââ^)š!TåÄ€a óá¬îÙгá½S"¡;è•üu^S‰ -A5ò™6B=Û¢vnÛ™Tã­™±Ï¼"…Ñ÷©]/Í-}{ˆÑÆÉ@ŸÓ–+‹šiý©caÎpnÉÛã´ÓàÕ{ª N©‚ïh%sùªéðž ¹×áN¤‚äå¬VÚäÎǃ¿a©š]ôè?Õ, íòbt¡l“˜h6‘ÿOˆvÂ_§Ré̳9S ^Pí!Qp÷ Á÷¹r6÷¼Á;¥³Z–è¿ÆävØ9k yŽµH’yp;DÉ×ã “jí1œÐ:ðR„]¬Ý›bzGC?"%‚|eÝ¥w!è±–Í6Ë¥RPè$æ·¹:ÿ½Ô)ɽŽE#Øè¥d'¬Ãå¿-¹¶ È2/Å %ñŽ}‡Ù¿ŒØÜ mË…¯ZüÄ¢L8ÏÀôÙcËóµÏoÙ¬Š›LH¥ÔºKÜôó¶ž)˜õqž+×ÛcYy•ÅÍ»¥Ÿ› -_MÌŒý`-@³0Vý'ôœÏ±Ðö©tS¨É¤d?ݱX(ƒYƒ…Ä÷kNWµYÀíFØ<èB}³–ék³ÛÃßRí(ÚÕÏ^2Ø%0Aüýìm„»ŒÎél|mƒoÓÈì©4Q庭„¡y|“Úvï+~¤ŸÄ”EµcŽÔ²Ù’MÆë×/­öëtØ…üHeš†ÙPC´évNîç"G6} ¦ì ]µ1¡AÀŒ”+|ç°(ä¹VÕñߊG¨¬l7‚KfWzÞrŸ_í=ûG"{'ª4ˆXM ¦(¿Õ±†ïz/ܳ¶ÙzÒLÃZ„|¹‰DÆ4-ùá0Eíp¶ŠZSú¸óAÇᇬˆ³ÈáظA䔹Á½tl¾±ÎHý¡[TxsÎM¢…P‰*´ ee©¦f¦¾wkÚ‡øH1)7¨i"ÐÃï‚$0Øþ´«ÃV?i¿‰zóç8ÆÅ|¿¥>dÿjyTšÃµVmÇæ—áq§ÌgCÊ‘ó‘ïØÀºš9z»åTˆCÞ/Ät~º‘؆§o*í—9ŒÆrÏÕI‹¡oj¯UíKöxj¡º. ,Íø+b1°Ö²ñ ÃI›lDLïQZ3÷¶P-noô¼ò½'<ÐY½ße-ì1/øŒŸ7Œ*E5#—d‡ñ¤oŽ–{]Cª>$þF zg!Ј*£oºÓjŠÔ-Nûíͥͭoò8ÅKwQgÝpœ*1^‚.ÖŠˆÎÂë± LV‰~ôÈÿüøm laÎ ]%ì."¦`»©_®Q·iäpÀöÅŽÊs’5y.XƒÒÍ®)ñæ¢T¡í¦öd1KŸ¢k««ÌŠjÝ«¢n0zªG®áq‰·ëa©WQé%z„ÓJ²¹ØX¤¢Â3tIy0ľ£.Ó#‰Å[“#¤ƒD#EÔ‚’pp¯F*«¨õ±Þ˜vïrOÓdÎn7n ¤Òf9ƒ*¯ìX:õ¾ç>Ǽš:*ÍÞLfÍÃ9’&a’j;+{»0‡ )ÐÚA’gðK*êXKÂÛV¶'zÙZóÌAÚ) nîöÏ ³ÕT¼”ÙÛLÎßJëïòé^Di¨ý[¦ð:s<ܸ2Â!ñÞU‡ú|›–#ù5V„ëfÑUŠêqP}÷üÒ|=ÿŒš»¾GÂ5O|PÃèsš9ñ§Ž}äå“* -+gpA€,°,Á=|j«25CÉ‚7›õÕ ’ï–¯µ—Ø.é/F¨ɹË0¢–tRœáºŠ."we h¿P€ eäÖmpòîJ‡o´@°±â–ÿ_ðêAÒ3§âQé¿BVô~$ð9E¢ºj= -‹]? «è{w¯_53÷¥FîgÓ¿³4B™èä›ìA¨«á½q²ÏðËßÀÞÑ4µŠ‡¤Ðî1®PfÔ ¬ó,ÖSÅŸ6ôã¼ r¤§[‘ÝÁv`à ¼µ±ûô¿x÷G$ÿŸ„¥êÏ.}¡¹Î Ì”·¨ÜϾFÍ™”c°±š‘‚=­Áf!*˽ª./çwĉR8º(@ÙÙ‹šz c¸½@ÃN&Im—žÝ£>X!Èm•eñÊ@ÎlWÿ"ÒVCHèø8ÔkÕ‘ŨÍÀj]´nó3X>J¡z?ÙDí€=Nø…½bn {qbY(>"Ù8*苾ÉÍk¨y3øoâŠøÜ €ó‰Hã Dwâ¹ø]¤€ ½ëd {E’ʥËwÙá™.$Váçhtœözk$kk$]4ë¯ðcf¸¡ú¯»CW–@òËóû3ö—plQ¯ “Q晉bá¨/póµó T“q¾FÑ8_þÕA’¿nëQÔÿ¢mKŽ{&z;<Õ¿žmòã[i7Ìϴ뜣ñ'*ýqX€Tnòt<ÕŒkò/?©2m)žËÏfô¹¯í•áâ¢øy#(ã¯`%Ú{ª§!¼Qœ/0&sßx¹ôfçB«+¬Õr‹ïEßï[tÑÚgOKQ£$ú$²=à‚ל?êd@3c)]3ª–êÕÕ%*M}HÙ饞;Cd£á˜Érp½°1jýŸF¿ˆc…st•íG'÷âC\–ºp)­)öþœ_š¢)±Õ(¹ú›¥µ MMº -µ³„ qÆv&¶¯Ê÷Cßå3N¶LZs³wTPu{‘s¼ºÏ°øPž‹+-­©¡U‹—±FõUøP"»KΆ†+«¼ÒY¾e7?0«(•öidšL´;gk9Á(YI-Œl`®õî;`ÕçæAÐH:´® áYHb1“µâ»HÇ{¸wƒÇR^YÑš, - $F¨Ô–HDÙòV?pòyÑ—¹PÞ%F=»‚ÂÇÆB‡öÙö¡GKØPh³~N´[¸IÊ_ꈀCðTzÞÒf ç <Ó–”Œü`4|mƒE1ÖJ'¼ýáÝ$`}R ‚Ú\¨€âN2kf¢aóouJ73ê´òváê¿ôKÛW{œMÓÞ3?שMò]ù:W;v…˜Xâ;„B’ ‰§X.PáI;?Ÿ!T¤ˆ . ·sìöU]䮃’=ÑXŠþ ý’5´ÀŠ€r€Mt>y=¯†*ãbC¾~^ ½tÅAr±¡l'¶øŦ•üó\ô÷ØŸÉL냡P“&7ttöuGz;Kg4ÚóÑ;·u)"é³;úÜS•–Âv½ËðüõC~³?¿Jƒ»/’F"6U.°0>çô+ÜKQÓ„0"ÊÓ«Ñ=ëñséÞíƒxoJ]áת!Ìwü6OÝ.}‹ž€tëË5ð¢VâË©³+¬¥·¬­Àü7Š³Us‚š®Ö©ÆyçåÊÄ“TŸŒC5üGrgâUÄ5Ì$‡—Œ&|°µ¿Š~•ÝGªäØ8ž—ê ¡¯Mvåß(jt§ƒãß ñÏ¢nñí›]Î]ÅQÌW…OØ‘ˆm.Œ£Ž­ð R[¾ИoNÐngô IFç:?Ât”ØSUOÎcÆj2Cm[¥à«O7>kWúk¥¢©½ËX¯'ƒ²î¦œ ™#ñ–Ñt'X•d9+E7±ðNä™dŒ8{Å]òŒm¹/‡ Æñž»×Ž¥dv‹Y†Y–öþwopôé™!™EÄP7F?oÏÄjP9âò­"›Ág“BŠŒÎÅÙPîŽô;B^QO©“3ù¿*B 3³ƒun»$·ßIÆêÿŸ_Oeþ`Ô -˜$Àß`‚}-9Q/Ä¥åM³e€ž?ØÁÇÉêÒ°µìhä]Š¦bð¿ ýé»ê«_ê2<ÑøÍ–Æ+ìtª°[Ïœ±ˆJ¡þ©O@¤%M‘I‚Þé t¦µØvƒ†Õ\¹/bãÑÃòMVFÊ05ÿÎ+kIå2W9>f–}©· ÐøĶô3t‡¶«ÿº¡jª†¦Ã|"§´æôîÖ -ŸÇÉ[ÓLŠhŸŽY³)Øo57kßþ“#%H’\,¬xÎ|ôs¯sxí¥HžÍ†|¶;`Å vû%øÚ¯T[ícÖ]ï‚eŠ"G—Ujß«„yŠðtvxÕ:udQ‘uZy_²Td"“ª£b+çÁO†]Øl¯xà'6wòã6X²C¿DKOÁ‡f›(g@w]f¦V™ËÂ[ê~ëŒ1Fùé^A3v¬ ÇâO›vÒ¯¶2v -zúåT¸ÞúË0ÙÝb#ö›[›zyÏ’Õ$e1¬ÛkÕ5—dRAì`(Å…@{=yW>¿ðˆä>ú‘¯çy@æÅÿ¹Uáú;”œ±=ÃH;Çc{–ðÛ3û0´ü£^Õ®”)Õ™-{·O¹°OÖú2R¨Â´xÈèëˆQÅôwBgБâ¼ýSý»ž›"תl›bðöz;¹±˜IyÕ\E×ÁËê‰pr¨kM ·@@Âe&À 3•˜lÃʉtS÷æÄ%í)è/pRÇøÞÙÀª"2µu -˜Ee’5ø+ÄÙ°s§Zò'±¹§õ˜àçbˆQ²ôð°SlŸjÅö zvÞ€â~%.Kz\N¡Š„çüã÷Eê,ãeV2Ò=Mo)-JcÇVà¤:ÒUÙ2Ѹ>É1ˆ×È“"¿QWº ƒOýõÑ‘4*Ozûº.c¸ójc”«Y&üßÜØØÏìê=ë7”Y™´öøÈðê,A5nÊIßFŠ•/œ?üÌŠ™á ]as.´N/þCÕ»tÜýH…G50x}eb9—S&ùœ¬é0år¾à«J§H€‘ôN­\¿?^4'îœèb©É»ÏÀ':KÒ`wpcÏT–!ÜÛ_ÿ öH˜°øŠ¹ÝŠ†‘óþ{–ÅÚ}þH¿mZto”àÞê7Ê«¾ß‘ŸQžÇ·¾Ùbi>ÜœZO¤§ „Š“dºdFÕ¿cc -$"«˜ |ðhJQvB'\³Í%Ì6Âä²3è¯J)A‚g²+TÂò‰ªÀî§ø¼†ÊrnßãLì“O™ÁN”@µEzçHr$©d(|º÷Áµ*zˆÜtiF¾ Õˆ›º&«Öþ«Œ{Ó5rT[°)C÷ =t†<¢na‹´€ï3å—³b«m ~4¢@±Š¶xký‚surGàE½_à D#D¯°%Z‘ÿúR·ƒ”sqbª®ÝÆ:sÅBÛŸö‚šB6Õ©3bÝó7:GÆÛ mšÄ*‡^Ö¬ÇW B#?-Áö`]díNgåe§É ”â=Ñäž+J#Ÿñ|zµ +²”"¥^(}tÝŒ%ê…RdøŽ Ç?É̳sûßõhÓžPaLäUÍ ºs ç™ÈÏEAª«4c[ùŒÞþ‚墶Ûë§Üm%êq³ñ ]/`3"<fùµÕ²øÕ6hó :%~üÃ>ÓàCŠ¥³Çà™ÑÜÃåú"Ê dò$L8AðŒ€x„úÝqÐ0ø-ÈELÌæ’ßuÀ&yÅ2ÁrÛ2¸È‹z–^{4µžXúQ†¾Ö­Þs5\Ë¡ #Üç~Ç"eQ|7Åt)Ÿ½qv[_ÄQÀS‚5­‚Ü-1ߦ$F;‰Ã*yøXL™@`ÓƒMuûù¥$HCÑáB’®¼ºÂ3ãáU„phÒÌåZQ¬ é;H×vQva;—ϲE–c&µ098ì”)vC¶pzílpª@¡L|Né4fÒæÍS¾õ'/V9WŠØ -ƒŠš|N´”Áü¥À'IrÉYfŽ,¶YÍ:§ò|êûA£C­fªMúw ì6:9xõ'õøŒ&KkçOªšÀCqó:[ÚŠ¦¼3(Ðr™äÖ1Åà?ü™GÑ Õo˃ÜôÓ×;‘ÖxÿN˜.uœáŠYùÄN®¦~¹“qdx ‘£N’oÖ†Ð̦îúˆÖTÃb£ÿ>Þë󡤶¬]2Àr×dëµ0D>šìµ¾‹¼2ÝØ$1u2Àkdï·²±Í¡£W8Ô æZβšI8ÜQ²ÕñÒ»î¢=)K82d 3tcÆÕÃT¶‹ÄÉN°UÞ'KMC¬*Ïû7BGrJ›!˜-2àJÃùœ™¸ÅåFØÕ*øõ• °«tÔß¾Ï\Ö˲ɑBÝðHé~¹Ò¥ÏVØÒä#½“Œ&Ôû|Ob°˜ðÆ2Zìšà$ç̘ÞRLµø0ü)²²´ªG|'é.äðšùÑu‚âc©ß=w¼;é—¯9`ëVÛ Þ_>}G²ï´>i„ã»:æm",&e1à\jÀ'ܹ ¥-Â*]Á™l€Ë ŸIcBÁœ0G)‘³¡¸Jõ!ŸDùžgr€AÊXÔj×dýCÂ۹uÀB˜„-Ç&n,Ñ @¼ô¥‰ë-Y†µ½,Yê“–ø ü8°¡á¼U>ç -¢wÏ–ÜZ÷ÅBû¡ìÍšÁ«e/s@Ê–v`œ6K¿J F-㌱БÑä åfä·‰× -äME7vÑIÉ—„Ý9KúCP^Ï9Ú³:`õO˦Šq» ³ «‡l›1p¸…ù*gs4¡ýPצ‹>±ÉO›ÿßÒI*ôXkW±ôžÃ}™bËó`ŸÔý^ Ò˜‘ø·rؽٷBŽ‰6IA(q -¢£²È1=tåR´&×6‘𨑶~ͤúTªáQ¬PŽ!Øõüì¿Éeîìj kâîŒ>á}ÙËyFê.ßâ–Û2"`µÐ‡˜ÕHÄQ«Ê÷Šæ©æ™QT–ÁBÔ„¥¼Äê-¶î’×9ðf4¸±è×¹.ôÌC—AH¬+ûûîiuc=Ý!%Éæ²è‚ÖGÈx&ßـ߿㠻@à·ÏŽïmÏH:Ýᳯp‹zÂAsÉLG† -˜R£ïÞ,fúM#›†|¹ÿÕ’–k×–>‰yé1¨#póÅC^ÛÕr=éxaK_7mG¾È¿{·Ç=y¢¤­Ë㼟ªD‹ÄG§ãiSVWÞ4{,i"È“]üeÉr×4ŸxSôHó ô™ Ó…‹ßD…¡f‚†øHº|eÝ/Ó3Nýƒ?g™¹id“CžYd£ê€Ÿêš—²’†éMš5ä O8à*­_è÷0Ú,ØÙwæÛ=õÍ#®ö†šJ²†ÿ˜ÀÝqV+Çhô÷Eª÷ÐéBŠÍy;ˉð˜í}àº-Ä%i¿Øˆ9­ÒD¶A,’N­ô %E‹oËŒw»\”ó7Pm̵z¶²íµf8ËÙÂÕÀ¤ûÈ"ô"¹ÈHg»!ýŸ¸zbª¢ËŸ_d[ KXÉ;Ú -¨Ÿ§Õ‹HD¨ÞÔmÅCT>—,Ù§Ùò“ûS?þ..KΡ˜¤¡åˆ+ù¶HšÁ“Žä3›ßÁ -ŒdýgD§æ§`(C5¹¯Ž‡€Ÿ¶W·jokÙ˜*„Èóç)º9Âë J ±XÈÊHLAÒ±, ñéûŽÅ¢l’+Ô³Aø÷Íêʦ—8‚‡|éV¨%r½ÁÆ¢¶wéSVЃìÏö{'J×™N7ÔJ£éM6¦¼~4ªÕTLìœDX¤ÄKäÕÅr÷6çHÕÁ9¶o›=åùØ«ºL$5ç^«ow¡ùiØÝÊ?© o)¤QÅÑ|Ž*‹Ïç¶Å·ìît…±":JíÛ ÜSôc6Zsº~éã øCEÝñ¯ZF Ø'W±³ZÝÍ#¨KãEÄ%¨ÙÐw^ jÔ!|è»öà W*Œm9sC©¦=[Uõl¡œÁÌV W;Ú:Ë$º×†/z{œŽïj4ºJ}üì7Õø²:]ôD‚¯µî’ÀÕgVyUßRo–'É)ƒ ¼ ’úÿ¾,;-]÷jŠœ5ǯëÚn»@ñˆÞqŒ©Ó½[ªxì…ŽÛG[Gßv®ÖQ³XËÉ`s}«“¾¥Å^LgÎ’áæՆݯÈ|X{`£‰Ä(+?wåNwÀ>y>M€ZvA  BÀ’&çaVd9»¾¹ÐÌ -^(w-Âè ÏìÔ +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿äŒ¥?ü‹(ä›:øE(ùqÞÚUé[‰s½ $ªgG '“\ +wšÃM•ˆÏ‘¤s¾u¢IÐC'žƒêµÑØú93ó|Y@TÛ‚—EìÛÂè—k$:™mGzR›†g»?·x= w;•*E ›¡ëJ²úM8Í›,õuøàý²èZmYË +þâøµÙ`Š”n›ë¿.Ë£†ýî/J +E:– €ÉB³Oà÷/g^Mj9Z6ñ¬ïF脨0 B‰KBÕˆ[—°GÓO¿rEÞ÷µå符õY8ÁÉà6ñ¹¹n¹XÕ”°*w„€FoÜt8CF :ÿg6}dPâ§Ék»©™]«!­ð$ú¼3h+Û†^]â;J˜ØRÛ*Ç-uCí4È⚈uŽðµ6Lj=!;Í`‰,7TàÈåìØÍZX Vʬ]w)ߧ±ÏÌɸ)Á²ê%NµLß±‹VÙŠüÓ’ôÚIþŸœü”|5ú…sBŽÏMedO&ÿNg°’$q&äàYשA Øuþ­(†@HÞV¦qûÃQyåï?>&_Ø@GKf¸0ÊZ E\¡¯¾±$ÿ3e­‚LÛá +°Ž:G(,æ fÔë‚Ë[MÆÜ]l¾n™ ±ô¨uEäÖÓì~0¿Rø÷š‡tö›»^¤S Pêþ–¿jÀòw¨£Wv6ä™ò!¦Ýé¬ +­Ðï‘1²nC22OÕœQµS ê³tyóžn”åG + óýµ¾Œ}=_÷Ž¨ÊóÎnŽW?^è‚2I·=>œÄÃ]ëL£¢‚‹-‹Í¬Iô®úq^ ͘´ EM +,|K93¼¬ðQÒbÖ¸Z×¹pu ÐÖòPÛ«%ÃFùj`(¡m-¦>¥ĸºtGT‹ïf• +ëb0UAã¤xð“>±¬|ïºz40 +—BŽèCGÒ¡ó Œº%"èânHè¯â¨#Ü•· diKs¿öýx‘²cî³"ÕÖHc öAå2Ÿ·¦áskÝ6¢.ab?ìÀCGZրݣêÞ~côc>U¤Ú”!«U ·ð…Îìæø‚ب ®Zl¡p;ÿ@6CñÖþá‘ÑžØOå.©a‰ÞˆRÙ&‹UÕ÷ìÕtºA’ó•²_Kã·Éfq +á(CŠÿ¿$gUà^½†«/¨—è"z ×RkÌf ÿž¦vPGÃyQ”}ƒiòî0Ïñ}ãÄ*‡Ïí+»z>³šnÓ ˜ˆy!2XÁŒô{”Z6’”ã0Ŭá‡Wë ZîxVêŽ*ü2'PÔ§ˆxh}·C}—«Bºû—²â•.†ÈŽ”ºp&…îj…Žÿ©×·ù—dãqŒõª×â'gº¶0ˆéù·!gzŒ”$Í…AÊ\/=Çj‘ÂëU˜EB·ÎÈÏm–/ºöðÄ §ÎÖy²]Â"Q²5Q.Žš_xÕ(çæº]ïnõ´R©Å{æj„Fæ.6Á‹SµÇå0&‹M…P!>æÞÉô>¾ú¾òÇ’Ü¡Q°Øá±A&%_¢ÆÍ¡¢°•¯ß ~æ›eài6œSËܯIƒ_1pº×$€$~á<À-’.Í/¦pY7jU‡Ëcd适™ÊM“Tj)9Z[æaì +ƒIjðŽÁàQЖÌ@é1¾°tªž2»ú6“Îh½e>}[ˆ’E»fagŸ~ùg¯$íP81lÎÑ_ÿŽh8é*îðµ5îCÑÕ,ý áUwF`‘àeүͿýå;;1"c%¢|PüPh\CMNñ½‹ Qn1:ü .«hÑ+±ÁUe‚w€5­¿Ï6e–õãÜKì0k‹º(@çs¤ìQ-rñ¹­DÉ%úÝ}òzz®®RüãÚÉ7ùušÌÑKfÉÒ ¶1óú(±Æ¾Hµ¹^PÁ(ÿó ôB'vhuŠ fgÀ›ó!ÓÆá±› ¬š>¡Ôgªã¦‰ZLºyå’ûM«SŒ({º’÷ÔCªE§‡]éûæ;ïþÇ&m±ûP†4 }:FkR‹{˜÷"¯KÅ ;\Çc¡‘/¤[ápUÝ)6)gÔsô‡ÔÁµ +„.Œ +wçøʘXóÔ¼Ï68…|%Ç;xõÃ_ðÝêÿò¼€jA÷ñw(¿šdŒÿ—BsÑŒð¼é’>„èú~è>‹­bÛ þûµa™‰5s8«Û÷èݯÿÃÐ>ןqˆ§ î!/‡ßÑÇjWKŸpDzÂ¥›Á™ùN=0óCÈ72K™¿¯0Ôër¦|]´*LÛí­Åø†Æxå\þÒ¨’ëü +üG:Æü—c{7&Lš3ÛÊP‡ÙàDúîfº$FV‰—ÒeŽ‘ü=…X`5 :cZ+ˆ2a ]ê.…D`©“KÊÇk^%÷ÂIh´MHžŸ—/F? A¸]bu¨\Ék)¹šT¾Ÿ¸@ú,³¡ÈØö®d[‹ñÏ¿ÐBí®‚, Hh«¤ÁŒª^Û¯ÏI±±*ˆ™qjT‘qûbx`J@̤HIæ{…¾€Û£;§qOÒ]ßûTe„7”xwy÷i”CåoQ.<èÈV¼ÓYx}½ +kLÞ‡1‡Ú†ÒÁµÒ'mȾ­ZzË\0©çž…Ë‹õ8FôXqàŽÕÝêTòt-1À¶ÝAºˆrz€†–&‚ +ß"iå/3 KÅKÓ¨÷å ÙŸ7bA-ÞÄ +œ¾Ë÷y“²#ĹÀWMN«ë¬]Jþ—ו¦ÀU *É;.Ï ‘ %wB©¡åy;h¢"1ӡ환%NÑ©p1ú ‚Œ‰ð@+VJ–ÅSƒ€).—¾&µ˜yš‘õ+¿p<l)Ùlw†ý¥?Ê̹”FfG•òµ ~r®#ßýÍ]%?£ÓöLh0R"1ƒY,”1ªÌì´¦ˆ`H+blb„›¶Uû¢—“Š<˜nÑ[s {åyA$ºÁÙNwtú•Œ&Bºˆ!L‹3e,DD12–r¿T5ˆ ÿ ¢Hù¬í{bLÔåëe/wyBQ~ªæíÉâÁÿOžQk6¥ÐðàÊüºñÂ$+æåB ^<‚HZ°~“ÓNp¼þ¬å#³j*µô¯gqWkM6z¥±½Ã·Ø2‰w¢õÙÖ8©”¡ùèýTèÚ|JÄÊÆ,ܱJÖŸUVzÅ%C/•Tü­R(x÷j‡‹Ø q?táÐhSÜõL陼æB\j äò¥Ei¥cB ßgŸb £š=e¨ÎáX2)ƒm ØÊ9 ý˜odšB°a%\“ìÙ¸Ü *sr“ê„7ìzóE±ë;~‡€ðPØÈÙ¨ê ѽ„Ð •Lù%*DCî̯Væªpdmô™›öÿo­Põû›B¬­6¨é9OsÉE‡Ríœ"(¶$@A.6'y[)GG¬˜`Ùšqè!ØùfGŒ—õÓ(OZEü2u[\îÓ澶-ËXu[é]’¼ËÉÄ…üe¡[þ£@×$@ù´¸zýJ&„ûäàŒíU, ͸ +Û«+û|äï¥íØø½ +¹ËF– +‘Ž6ždnú¨å‘§Ô[°E-x•þÔ»‘ïÙR¹,*[c—ŽPÇLCã 6'«ç»µûøÍ~ô8q!÷.ë:š¿÷©ÄkS´kúªn98@Ì?-±×Tùt¡B€Ô¬™Æ×Eš!3`yl& WAÁÑvïËØ8æª8 +˜Wç2¢4åCqMAÌa©Â·½qqYqfìžžœâÓö“õŸ¯„bۘDŽ5Ÿ(‰ZÊ$hÃߦ‘·m±­ÜÞ,Å·ð,ï]ªx¡óNõ8Û4 öøtè»G—= ØXv´Ògkñgc q¼ÜdfÜ%Mž› SG—øPïm©«É,,ÆÚÈî¯ÆåL ²åŽAvŠ¨9l1ð¤Hë€`™Cø⃗Æ!Tà¹!2RÅ9QN›õIuV;¯D\õ½ÖGCöhtAÀÚ†‡eS6ã ¤À[2ÆÑV$¨<¸oÄÚº®~ Òro1U>&=éþÓ³ö¸‡Ñ%<ÅìÊhh +Ð*8‰‚ƒeõ”NåakU±u%ãvH²EDÔÐÒŠ58ê‰võ"ȉª?H‚}Ù( 6Ö˜s[1O=xZ1ÁÏ[$Ñ'ÏÐ!Ö„¯„«W$†ê?ÒãضdsÇHˆ» \éfÖB–Ë6ÁEÌ%øXãKm'á:Nw;b%Ì÷šA5Q•¹ÑœyacP»)tLq=͘F.’ %O -¼Iˆhke¸ÏÌ!òw†™¬«ï(Lö<tÉÏl%"Én…Q´)ã4a +iw+ 2bgr8%#ÃnxTËG&yû«ñß1Ño¢•È0LäúewÕÈz¶ó3áÂî«Â +i„¡e9Y½o¥“÷ò÷'ºâ}zf *ê8!é¯ï6¹¶Ôï8S´("úÈ,7MXJ‰ªALìÎÃî©…ÎI1sëÚ6$)ϺÑøjbø¶ŽcÞZÉ͔զIG™'`þ#áÜ|ˆ5+…DÝ÷×bm6›iæ¹Û>‘]ÇÁ,2¹ÿ“Jó ÖàŠo5@|JK6!CõÉL&dxXñBÝ8À‚p9™j4¾ [ú™Ù‡èGtö´bú.Ô/ï _9"Å:Nªx ðèýùV¯ª)$›®ÕúÎÍÕ¥þ»tùV5è©çû4jô·°ðCYÈ­§ü·†Uçï-ÁÃi73)‚IF27¦øñsÓx¤ƒ:¤­ùqdAÃÙºXHzõÏlaV/Úõ·üDïû8š4&MÞ¬–ög¦V®1¡±Rü­¯„˜<éÍ8„Îí ì?Ðb¥ 6¸[êèZøŠ$ŸÛR ¨ÖvƒT-Á|âî0àV¿ ¿|Ð?Dµ£…7ê•Fê<Ǽ”i;zHèè­8Ùc,o“H_š‹³žK" oo%££&T€»ðT{ݱߎZ¼ï\Ù=øŽ`ì㬀:ŽÛzXÀî Ú ©›QDWn®*Ry£Ê£çW­^kDÑ!&™Ÿ§ÏÝp`|Oú*¾—W^¿k¸ˆWn\Ÿa^È0§AzÓk­@ñÚß66¢‹zOÈçË¢^ƒcb-!„zp$‰²Œö'he–÷WÅÁ}Êþ?'Ï˹Yp˜•2þ¡íÎü¸xþÌtÚÐßïD;9\”Äwè{d\’w¯ŒŒþª€*§VÒ•ä²³1Í }¥½÷¬FTîÂÑ/¿oôs€G‹Sú ÉþˆyI‚ˆzüÏÞxÄÙP¬OZãMŸAôÝ'FÚ·jf×zM†`7”U»˜µ‚ë*÷…0š†»@áW·ØòBÛßX€3Êo¹Ê•:c6˜‰Úø£÷R‘ôEÝÄ!Pÿܨ^F£θej]Ó¬¤!mð®=Ü¡ÙPàñ:4®ZIqõêvknåZUêûÓËšË]P$ÏT¶ËýÌØ%.‹þÔ3¡Óÿ‰8‰ƒå`®1ã!¡MxéRîíŠ ‰½š=ŸåN‹­ó#xPtâð#Ï&Tt·+i~®u“$Ž­zÝY(ê=ìÅÏ>T–q–ZÈ„›;ˆ[}ªp­ßa€~ô¿ Ë[pî‡ÖsŸ!c yÞ;àôÅ.e\éNhôéî]"óXˆRñ€‹‘’þ3å+w^Yë$àÍ­’÷a³a{›ÍÙcçFäSŽu1\ŽÁƒ,*ÛFD­ý6, ËeYÃÚÉ VOªñ~ÿ<‡Þ¹•5å0„Þ´Sc†YNr¹t¨E{óÖaœ°Šç,Å»:4 W ¿¦\…eXµÝ¿+ÆÖßWiæôÝ6çÇ :ÅŠ²ß£íÔ–ÁŒ‰tì˜ \1O|å³”åÄh”<“¢"­eøuÎ ß"¼¬¬k(Fî 9õ?ƒ?'y•Š‹xI“xCf&Ëó,»·èfö0ÆGû°Ã£¡^Ül‡5\_|‚*‰æõÑm 4ËIžÈd²3$)Éf˜ÕΦÔeÌžÁd<‰ùp3W¬KàNîmÊB˜‘' KÞ×<2†‘–§ÁxÓÑ3œJÜ‘j¦ôõþŠd-¢åOB$ìA9ÙáVtËÍÁ!- €¾/EJ}†ÐÏd¬[ÌyÚ{öÐÚH/ã8ÿqb³Žpí¦w?…àjù§¹Y¨Þ¨+?¯=Œ¡—ë²ôþÞˆ‡Fò›#¯N˜:îáznmy.!Ö‡¸ýA6q#Ïîà,Ænc+âžo +rCR²ß` +"h³ZSÏC×f'Öƒ|ë‰OÅJÑ¢Þ4qmպă0¼á×¹­‰ kÆFÅÏ›$y´çöýT÷ùÓJAê$碬¶e•’˜˜L||gBø|ÿ{Ý$·W“CÚ^UGè€!±ßÚ·‰¬ÐþZ/e‡ºj†³Î‰jž‡$,£:»©¼ê–CŒk¿)µÏ8SJc¸ ®¬ñãêŒzžÅÃëÓUM‘ùNðT§þû¬f»{¹·ÄK‰×ÏS‚?O~Ý¡8RÃR¾}Íó¦™Ô@‹“\aI:^·Àlââ†ÐyÛkH—@—¤Ý".,Ôÿ˜ÜyɉÌÀ†Ê©ûo!TïÚiâSPz8çC3öF.áÛÓJ¼·tÃà ^ ¾žâƒ3xv%¶£ ªn`ñ=Õ 7ˆ"¥^>hC2)+›qLvW¯ +g–ëZÏÙ­㦨̰ÀôÅBäƒ$é£@®Š‚ñvÉ…º&ÝvP²Š2o‹’ôát›8ã~v8ÙÍžc»d_MNö.¿lßÑvSÒÖ¥Û(ëﻺtÉW:HèëµàcÚ'l‰[“)ßUžä÷4–ýÒµ‹ G[^@Á^r\Åò‹¯ÑéLý:)®zpÌjm4ÕÝnx¡÷Ñ¢TúÙñ%²Y›Y¯COè…ĤșŠ ­b‚Øܤ!Ic=ô±¿Ä!Ä€ÎÉ?3¤—ÇÍZuv£ÞîàDI6ž!u±Ü¶ ÔM±?Çb5‰£zùp$EöÏìÜE¨Ùm{ƒ<燑 ˿ÆÔ摪‰œžoš¾.'W@/•|au‰û¨t’ ¯¸˜‰)muP#4ü«Ñ¹©Òß¬¤Á2ñðP|ˆZŸ9#^†.p×é40ïîôÊAÆ5&Íj|$mGô.1ˆ`Šò8Z@$9¨Õõ²Hà!Rá÷iŒ²Áƒ6©'ÎÎïÒ‰G¼»É1“tânkõnÌaÎô$¥Nb}*Ú@[œ´Ã“xB#Sí.ÿL?ôÀ˜öü¤'Ò¤S`©×‚vªŒz¹×Ö·#dº¯Øëf0Ì‹Káǯ—ʳc%ýÙñ¸ <á€dà!šl:ÀXKW0®m¦V—)-L%€½¦kRYMX Ùœ|,˜Àyž©.™B8ó¶ꄉï™çpî׊–Æ*ܾÛæô?ÞG.Q¡{åáQ&³;õ­ÕùÌ¿ 'öç!}·ˆƒî ¬ôo¹£/—܈í‚D-¯;)ø+lÊ”ÍÚ lT${Ñ‹zO‹Å”a¡NÚx¥¦¸R‹Ÿ+€G¤^»óá$:àÅÛ¿®ãÐå²Ñ‡ûj¶»}`˦zRÃa0› È—rmÆFbP¯4'ã#í´•š'0ç¨âô ™ ˆß¡š>Î:!C穤¯ýqG PjÑ]ŽÎ6yâROFË©éX¨m¶GºUß­tK®u‹GŽþ\»«Q8Öä«©:(vüIªe41“ù=ò9¡)’è`È™ÈxÂ|`ZÕxüŽ0«ÖϾöÉH©&û¹Éqáá å¾ü*@˜çp  7© Yy¦Bï͹xÝ¿¢™¶qß°Œ`ÁHšÂ»pÈÓHÇýÉj¶ÓÄX »DÕÿ±ÚÖ×µ`1’åéØE+yÿ88#¼‚Rõ}Å– •  û~ LÈ…"ðÿ¯´*¿nD<ÁNXºBŒØr%Çÿ(ýÍO¸˜ R ±¤ýd’úókðpEÖ+Úë€ÛHDòÆgæ$²MDí=?v(Í-Œ§¡±3¦‘¡=N½•­Ë÷rrËuH³aÒÎ ©¬!¨s¤¬»ä¾™Ww`'K´¢Ubúó“6‰¬Ò^ê*a%†~õ#ÂÖÏ'®(Q³ªp…AÄg8¡àÌÌ@@[k‹ÿ,2¢£ŽS“EôµkŽ>7 Ù¥\BŠîUIÓ¼£©”H0³L‡©.G³ ñ )œEæÛ<ŽžíMÝí ,£raá0‘ +]á¯XÊÃðmÑø?¥ FꛢcR9|=ÅZÍ•'ex‹WÃ4«-¼›rÃ×Å›o2•ÃkúƒÌ¸á[,÷þøÉœ)½1ᶪ¸Ÿ ³›¨ozqw‚÷¹¶à{]ü%ö ,pëˆÍç< ‰.>eë(ļydgÄQ‚£ý2=\bÙÉ8Z .xÙç««7­%M}ÈïÕ¯jÿãWvD ˜dçG¬¬…|D[ÜÕmh—ó|ø×çxðgúúàîjŽæ£»ùÁRLO+H©oâêå%ZF&Ÿ÷'ã`}ýŽ5+…DÝ÷×bm6›iæ¹Û>^­ÿWÂú¿5ÙðtÉŒm˜b°©‚ež†“D{ùýM§ë/µ‰ec@MÈH¸{q¸8‡B<ŽÀ•ïžB•hFÜ­ÓÔ÷½\‰÷éXµ„)ƃ1â…Ùªv‡(å£@—õßlä|÷Eä/ò9L{'\ðã}̳¦­=ßòõ|pI:ÿx8l¦ºsÛÔ>B¥ÇrÔÍ5)ٱģ Y—‡q¾]óÄ$&eT31ÚEŸÚ‘§ˆhÚñÈV+/YƤ(ã¨X¢–~E>¦¬š¦<&rïy|kVqÕ6Ò—í ¼uÍœ0éPÓÁÕ}g¼½ÄX´.ˆåHÒ.¢{aýl‰Ö[óNf4å0w kihxàŽ'…³œ¼_3bßÿ'QJC”z‰a â?¢]Ð$!:€‡ Žþ<ilì›­áÙxeøÂÙµ\KØ'{Ý.†‡¦¸Ô?ˆ>yK>鯊\Ñ +4~Ç9£Ÿ¿ðf+“² cUÝ$d¹íšïMejÏ¢jÃÅS]©6!W#u:$Ñ ÛN“ÆÔ•\Z?äžëoò‚[¶§oûÔöä½Ý‚AöŠ{ÊßË=$v>–òÃ…¹ùÃN‚ð.¸È¿Ÿ˜ÿß<.­ Õ­—å\¬ +åcVOÁìðåÈðš& üçÀ@ÑuKÀ‚·\ß•fšA•[,¼fp˜–ÖŸJìÒ~J%÷e¼µyTÄËZšBÆÏn Ø ly4D¥¢"ôš»#¼aï;³=Ä\d¤Qü§PsJo§]»„’yHN‰Le(“3Fªszˆ©É, ±K²Øz£•ïnŠj–pSäë—´€(^8Æ.þø–ox6®FWÌÉ Tú^|ÈܦBÏ天Jª¡SÐõõë<¼­èË +YXux›Ï—‹,#8 +4’ø~FšJ |Y&Ñ°låç iòB€EÍ`ž#Î7yb•·B O€»O÷y¶¤e²V²Ä;'âŽ%ãN6HDð¿¾PB`‰ìpq?QqLƒ\œ•Ö-€‰ä®{Fó;:‡=3󣬦‡û³ô*w»Ö‚;°‡í®ùZRE— &*TUrüÙöM**=`D“±ÂMYâ¦õ…XÊ'N@qxäæ¥?ê”Eûá*˜Ô›‰]¬yʃ?Üçv¬ŠõÓ©ÞˆÇÖ¸B7XmÔš)U¦tÆ Wtç~ÒNbõ¨]ËL¯•¾‚Ϫdà!ËSì2 +­lìÐdüõ¹Š ýóÞÈ­I3òš:ORB>¢EVp;'vnNŽ>6ùØK‹\qàëñ_4ýÙ»N’U–þMÚ“lÆ%¡[.ªê_1ؘ—‘%Ò¬4ŽÊH±Ñjw¤yeŸvhºD¶j„c®0…]8ªÐuɯü`b¬Žû9£ÅMÇ&Õ¯Š±}Ñ :ª~¼z¬é®•Ÿ{Âî#É£yý©÷¿¨Þ,ZíQ‰I>š`ÄÕ>ëD 6.V¿S³o0†ìr:x¡kÿ£°•|4ŠÎ¢G´ÊY +]wcÞ©u]k„ Áv©ôV—î‡úW¹ÍíÎ[åìò=°Oétw*"$ü;Êw–ß{Œ©´ÿŸçpK†é€–#ó㣮55RÆÖÒ¼ º·Ú–ž’™ó[&Þç>6 ¢F¦´#˜‰_•ÕöŒê -‹æÜÔž{b=i7fÞÞ‡5Öz®“ÃÔ˜§Ó¤- óy#tbci4Ýñ0-Î^èTH¥ÃÏYm}€%x¦¶ÙͺhŒ0A±:]øÄéT¶9[‡þXe7¢Jð„çTMŠvIB¬ÎÐ:w…؉5Júàþ€@¦À7à ¹`p»¢º„upÝ&œê9adœ ÏS(…¤•™ +œb1¡%™,hh2ÚzQ+$2%l@izɚϓ­g%sAâ8¯óÆM)¸Õ= §A‚þâ8ËÜT:XJâ+·;GýÑ5t‰…oå D+ëŠwù)Šu;C`øÞ~ºL%ƒ—Ím§bG T\æ¸ÜJŒ~7‘atðê© Ÿ„@ƒÏ•œ›œÇy‡½—š™qe™~Á-n‹ÜÒ?¬®%ÞÖéütÞñ*¯ã7š®†b{¡‚e(ƒ—‰¸äzaX ,Û[²‡®m¥×Y˜!»ªd ]’7ŽÚµiÃG^‚‡àQvĽž@Zÿã:¥3ú&Á*@šñ~nò1ìzÁ• A¸±Þ2>­¬GûÔ‘bl§q⶜åÊ«ªÓ§vvI™m²joX†¤6%Ʀw£}]ô>àþ6Þ_'¯VŒÁô,Û5ߨyO‘ÓTTÄ-æÿø¬†ƒÝbírq&ò6Ç]&…òý3N&`<vØÓ¼êì’XÕ«G“Ä M}Bg"Mu|ìõQ­ô³DÆ’dÌü.ŸE’b®±=“èD$h‘&Ÿ0þÖkÙ)‘Š)Pw†¦ÑdÂ:6›ûp¢ +J.Èã>rW•úrìi8(±ˆ¸¸P›š||”ŽnnJ mTpB”G‹LfJNŸ¡Uø®ÅBé„-yɘ—JÕ™Y~_§¬Ìã²Ð£”f€ßòá s›ÄÅlE'Þ‹1àõSþC†:T·ìFÁ Ç÷‘RÞÙW :py†¹s³ÍœZ[å‹YÀ ¸@==g~6ëˆôRõa“ÉÖÓW*ªÅZ@[ðô‚nid¡³É²Ø(D£šÿ_æ†UàÃv<Åøîaÿ$]á1êf° I“à©”L ´q‰¢°ÐD Ë„FߋПś•GB½²t+( +ï•ûI-_ï‘>xb!òijHèÞ¿9M’`챓öOîóœòßôž:š%Ig8ê:!zPÀ›QK~IÎf¡¶¹ rq›5It{²éãþ¼â‡“ÄD©‰†ž¸JHSuN‹Çf.>À›ÒíS©²Òv.P6)ÈÞø*CÖOFŒs¢N¢c|€ï É‘ÅÝÅqü³‹j‹i{‰tÎqÆY…¼0dbè­YÜãÿÇü–3BšÉYÜô%P"?6 +¾û}FrĘOð-Øe流Pz˜Aþ;,ÍÛdòÆ-†$áü¥Kµwô´Ò2¨ºj +eX¡è\kE*ÿ2(P†_?f Yä÷Æõž=ü}LO¯UáQßï‹Gä] J×ÒÊñwP—5zBuÜ9C$9<Ÿ–Šü3Úsu´ð´Òª-K°æ¦–Ô¢Ií×ó¶võ#Er¡q (àYܽ6Œ^!b’4˜„Þ.UÿòÖ×1Ä}`H‡ö½‚ͨžò=œ£þ:C…ëþyI¸CÕÜôœ³ÈÝ oXÛ¹›r"CPÉ]r¢vw2òž+6èÏÉ2¯-Ú(Ö1ÅY»Ìô(_ÔíùÃ2³Qiµà:m\5ëp = î‘YU&…xZòù^£°ˆªP²æãþQ‡Þ$ÝEèTÆd®S=”PºÛ†Lþ½|æÅïWké…¬xaÝW|œe×VÇSlaòU| …4ÐÊ’äT~ìïBÚxlb4û˜Û†˜ò”·Ú%+K«ï,aY3à$rçPq×ùÅ/æU2Û&p,Ü‚òÐÆç ðP€øz›`IMô ó”” N=Æ"(¡wÿ¾qŸmÒôBHÞf¶êÜÊj ÷+h²ÈM´A`¨+zx¶m×0% hRZ³œ‘Ö¡4A”Š¢ùÖ HÙ0`BžÜb=Ð,2=pd”M‰ë.l 0/¦àmµÆ‰ÛãsºŒœ ?ólˆª©½X_5aüX²X ƒÜbi«jMõq‰¤JWïügÏäß`£K°óÔoä/˜[Ï=”“­Ì³q}bÝG`ÚMÝ^°—³o¾¬©gLxê~!ÛqWS¡ÉñÍÆ7rÂú(‡Ro= +½((›F5‘|l.2@ÈgP#€'¹¸á)ã“—Yš­NåS“žîaïK7–öòWŸß}&gÅ +9ÐÆÎiîÞ% ÅŒ›5¶°"r2æþäaKÞ ù¨GI ‚î.ÚZ󆛾ßØ!cl§¿yµ¸À’^ȹp( >¬b …´ÀÆ7î)uv–Ãñ5©‰Õj*³Ä΃n™Þó*RPvZKI@.w»h!®9>xq<½QÈ©ÞáYkh£ÚÜþ _‡ûœScßjÞî©+±LkÅ)–6„ÜYåá?nWS yTHÄ& MÈ«*²=$OÛíà˜a5ê™™h¦Æ_¯¨@ô£“Ù¿Û›åÒM6îëC)¸Ú³SÉ"C.’9J•m© ,v"³e]*Õ×*X6ÊDŽžøù?®Û˜ÛÇþ¦ü L†ò˜M£¬D =¡Rnó]Y»—I€øÐG‰5ÅwªÎ&âU½Ÿ}9ôŽåØotÏéä<Âd2õ²=ˆë6¼.išìâŽo+87Î’»d<‡Ý”Ê+±…¼Ó|x ÷ž¯ «­~9oËÀZ mÅBûS-£k9àÄ•iºý¾d,–4p~AÚW+&T{¤t&+åÎ}(ç+ž”zÕ“´‹=Ð] +„¾oìæ +ÀÜéŒyãà5EÃÙX’UxR ðÂ]é¯Éò׃ ƒ°¸Ü§gl{ßâ4Å]ÓúŽkØ·™ShÍÐN⻦1¦\Ã(“TÑCÍžÜÑr8‘Ø´½Ç˳bíeqç¾1F…¡¶Êƈä]e;ÂŽ.{ñ:Êk;µNXrlè0Ž­y¦ù“š)³œÖ»$¦?ŠÓá»ì +bñøjù²¥³â¨ç­‰0ÉwzЫËtSK‰e«‡’{µì¡š·é9GB'süKb×f²WÛ.IG÷PP‹vßD0Ÿ=›¡mQÿîðKzõó‹!šÌ—Cãè€auLªcå&˜ +o•Á\tÚÆä(s% üDÜÊu/W€mpê» 5‡u„ŽŒ±¥ï_)CÚ¦'Ü‚ÇÏVé“rzË9ÈÕÇ ×ÁªáECápÑCyN/ãk¥¿éìˆç€ú•‡ÝÖ`9üßÎø»C‚1SæÂ=11!£ÇÆçš~Ęۺ‹4“ŸeãDù_‹(¾'Ëbß"* þ³‘¡x£Z%ˆ>ߊx¨a.]?a}°˜¸‰ŸË3Ø\”W2ë¹À·ŸhYÒçŸKOµ¸Ý$A‡U³@¶VÙä™d×ÁXV?ù¼À´Â­ãتI)4^NŪÂ×f¶pÚq+”s–Î&s}6ÿ1{H‚NC¢1&˜=~ ¡aÑĬ…ÁŠZ®é·$ð?1†]wô×OzR–?î~ f€X"Ëû2¨O«¼‰Õ/¹ê¡É•·Ñ ð÷;ƒÈ„>š0¹;n`ð!§ÒZæ?g¹±À.ÉhQ³³TºiÀ'Y.!í¤O$8²…·,ÈètßÞ,~õ³ÅB(ë¤ÒßT=š_ò5½Ý_$”œë‰©9Èk*çh¿osØãÈQÜ?1aLZ³õáŸ:9ìCæ‘ ÖN«çÃÆ+S8VöWWE^ŽH¦;ˆ†¾&R‚^ˆoJø¸‹¼ÙèssËnZ¢ãx˜e—mqâ-|2Ýj%AÈNݯg“ÚÝU©$…‚QC³êhk5ð-Ê¿R²l°mg®³Ç¡Ã½ß”ÌM¶Â.ÍõºH +èËÓ~©žaÌËÌ 91/#MMSÖL_A0§’Iöt_P–?Ž+4‰§å§lĦöú½:ΊÜE83ÎÑ$OÝ<ˆ3–àÍ/¼ºÇ‡ ‰É:ukYµÐÊø¯âü÷ö×'´û?› +ågšª¬È« „Ë»)žÓQÍÌý2(|?òÀ¸#t+•Gº—²°â>šà›ßŽ«;0ÍO“0Nž µ]ÔØIt8³ªçú—CÝ 5üÛ¢¾ KtúA8ñ«|ºÕC?2à°‡¶˜ÊIâD¨§L‹9¦Ú8üøž¢¼üT»a-Û\¢–~E>¦¬š¦<&rïy|kVq×’z™Œ‹Žû©ök¾Î¡73{0rÄ‹v +¨á̆1ÜëÃ݃ԑ,oóZѨ!¶¾ËÍA*}j“¥|9â¿ +šé °/_Lz­F?òïò þÆ”..›ù£ZÐ +öO1PZ®ÉÓŒ$|!ð}ãLPþI÷Zê°8½0ëvA +ôT.rçdH¶Î*«%­¶¶²¯õ )I»Z½c„qï€C°-¥)—Ô:Úõ©A$«YÅ‚¶¶gn6gö¿a¿XO×ór)îÆRa,á¹tÊ{â@S jÊ"ZéEUåµY¿ g7ѳ钅»çlöó‚kævª9ùíKÎ5×pŸu#ä” ¥.˜>¿ÍDr”±ùnÔ¬[¯{ˆlÙõOïï›T +™¶hØÓη¿ëÞ +d}bJDZîÀ.ÿÔ~X] +ÝìO•Ã­g»A'X·Mt˜Äïå™Rh¯1ØQûJ"¨…(#‚ûTâ1z»aÙò‚g}ÐÙ£Ãx3kt¯‹™1¾ÀÄÏ‹Hu˜5ëbTþáR¸÷‰ú™ö´ñ•P£7äUÜ¿¯Ì„Ñë°«g+ÔZ„â’N䀣Š}Ë¿aíwiõÝ#aË“@ÅxíÛ^ +!ŒybT˜¯Ý¤Ròª<÷Ì("©ñ "kk5tf6H‚†*“ô6°µNáÎ2º’n4¨øÃ|£ŠAšÿ÷ßpe™+MVvEîM/Â1cJ6™†tøÍaSØ7À˜4"`Áœ~À8SÎ^¤Û¦r‰FÁ5¿§Z›d,)™ÏBN;ï­SA¨à¡›ër¼Ítê"æ@7ZœªREf}|•òÊ•™9 jÅ ÜÆÚG^$EÔg&¦ÔQ‹øQ ©òCaêLˆs1…âü[Ór ¯{.¿¸ØÐ5©êÔƉÑÅT¼s!†… >l:«–,¹áj$Í´óõpã±í„Bjø[66Ì࢓ù—J 7 ï4Kn‚:±Ô ²:ŒE[æïG¸å³¢™:µÐõ§Ü;㸅™›øÚ‡3R€·ÀÆrC²žXɸfVÅñȹŽ"PA:U4ø±Ä:‡7Ñ×0^sÒ£˜B—»QŠ‡DE2vœÞš–…obPÖ%þsÂÚ82 £q¥dÍÄàoTO Uö +ÍJ_ +¢yÆIa‹âÙ±á ì© fðŽívF±ò¹w“~p·!@i `Ù‡ÕïC' +_€£’yÅúÝb­ªE®ÖÛÎÆs¶»8ɯY” «V€¢n/`–>Œ³ñVÅ$á\~]ЄÙÃý²$Š­X=K>Ö¾.ð!‚)|Rb¬ªW‹ýƒØž`°½àd8ÎN.:ÈŸ–-"Ñôñ »Ë1‚^?ê;çQÝ8WbI¦‰I”¦ÿ@ì ïCÏäÀ\€Ãâ¹I¹=ÑJÄ +NB&õìÓÚêË&ê^ÉQÂ<ìT«îÉÐ"#H8ÐjJ"D“òò¡°Ñ?Ý÷æG®(² ½ž3^E¯4†à`v‹¥ê"‘SHÔ¸5ÿÐs(VVr¯äÒ`³ÅËÍÒâÁׂ"~‘eµ62p» tõ+”›A}»•ëÜp´3·A1µÉ5× éÞu@zéJÄ);Š}Y^ãùÇé°×‰î؇ôÇåâ`„XÈJ˼ñT*¤­sÔÝek\.ºM‚N{ƒ>ˆÐŸ"‘ú¦þŒLÍW§¨Éõ“â 7:œQÒÅû¥/ÙlVˆlzgRˆ!Œ¤‹Æˆû)œë0¨©•Ÿ7L&‘ÍTp-ò•’ë}4Á°<6¨PæÒò¾¾9#šÛG½+ †·P!n±ÓÙHå_àüFJ ->÷·÷dÂcM s x!…›±‘Õ3˜¢ma S®jZ˜›ÒëO!ËçuA£™F,– CS@#ÝÒZZÝ"À1¢&ˆ¾@ÑËüNÿßuá|¥šWº;r “Ð.>˜ª_ò2šÆAwVªØ š*2èô¯–’qÿRÙ¹žÿn*Ãh•À$”ñÑçýÐ2©9t-µ`As$àÇƨ>^£„tÆš:±Á)ñ+µK>¬œ3%ýô+븜œƸØØP þKø ˆ²Ùy/hû¹§–­ Bÿ‰¬„: @†Îø[$~j¥Œ{:eÙ E˜Þmòªß~@È“ÙBÁE Lü֥ƟïÂ[`äRp.,©QK„Ügj.Þ¶¯¯ôé'ÌцúÝ}z¿‘äyS~…BÅ™Ôæºdøý¾4„êÌ(º“5¥“da’<̓Ê’”3áòPLÜñ'E5šãâWàÕ¤ñù®ŽÆ[q‚½H­1EûjU[Ç&cœ~ý98U:ê¶Æý °â›ûâ5b5-c° æNW@$#¿ZTe¿BÉõÝE;88YÔ ›ÝæRJx7&4 Ÿ"ñn@J6—Ãhäàš![ä‰õâ%Å컿Ë}^Ü÷ßßG¹}‰ÜîG* ]…¶Ûe_dÙ#Ü°3Q–ÄõQw (Çüz3ö-?ä¦=Ið+oûÔãr³¸^Á¶µ9“&̯Núç ʪ´ÿÑ„ÿËîEÒˆ»Ž:²¨0â©t„8þòCx:1›/ÿfO¼á§°eÇÇzŠçöjꮕR¥ö)ÌÅš„ÝåÕÿ™]ëU  SÖ$}zÈ£îoþˆLúH5âÐaà†Ò{AÉQn°ZŽ¨¥½1âýJOǨû"³ÓA½´gð›úÓ^NUo¿ÁtӕйD* Ê TîJá_&§0Jš—°ä—úÜ¢ž¡1Ô‚™ ¶L dÒÑ=wsö­ñ$_àq—™¬1¨»NWrŒÊÞ n ©Íçeæ€GKŸ—“uñÚÖØ1БÀȯ¨ý\@û"„/æF¢Ÿ7|>ÐáR5³²Ä34A—tg 5&†v|§Q@3z Õs9\ËnÏy¤vÇŒŽÁDÞ_”ûºÃæßv¢jŠˆÄ`Fuês„ó9ŽI@Jüá›C]? ‡8¶ê<^A£ææêçf];ÑŒ§[LÀb*µJâ„ïAÿ.\”\=PPxesQŽ‰0û•ˆ3 xc1pŠðØ‚9#Φøzž4%÷A.’ %O -¼Iˆhke¸ÏÌ!÷yâËì¦Âçà¦ýÀÑZi¹›6Bò°T%=÷êñNÔÞ[ˆO§¯?\é¸m¹ùÁˆÿœ;§:Š…ˆz{':`Õe¤ϽgÌ|gu¡·¶¾ª|ù/Ü47\A ûŠÆɆeÍ~ø€ù’)ªÙ˜È‚¹URÅ!QƒåûQ`[ªyHÖå|×à0*?’:ìy/4Å©îJ(ƒ„ž|Ï5^O,MoU]¬šmÂcö=UÖBL^FÃР: +´ØË)Í#>Þƒ¦Ã £1„Ö”‚õuω®ô'Ènþ¦úNUûÍ]€´±%½+Œ?ov ‚E¡­ŸPŽ0BkCî÷UDëv•%ú}€´§Ø §lã û¥ ëâ¾(ýŽmåX"DH'Gs… ùðÍp'@ïÁ;mûÃÒ…¤™2ò\ ÆS7?.9R’¶^ue¬Zð&ïDsŒb»¢Æ­1 ÔKŸÓéèϼ0 +Ú€¼3…nÒ›š´1”Ÿÿ] ïÊÎgÝ‚Ñ¡Ò—Ñ—>a%ÿ€^ÓŒvõµñZÝ%óø¥3\7!o¿Ë{á%Uƒx.…b 8€£ÁŽÄP=?U£7DtñÔ¾rqüôN×çˆíÐC,—·Ï{¯˜3ÍJ™rºê£¬üæ qâùã­°pS:q‹òYº Òá“‚Ìßû%æo·d½ sýÌ{oôÜHÞ„z¤È¥béH\$è›ø |Ú÷Ù9ü}RCú¿|«ÑnBÆ +3ÀãC»D™aó 4Ñ“1wåêÚŠñv‘¯ãÊ{XoŒBjqߨÎÛãß|>²‹A’x$Ô÷Œ9ðDíjð<¤E“þoÆþéŽkųÅ’±Æm1JšJýèz²î]¯@3ð9Æ!M(‚ú•Dƒ˜ŸKójrMAHI¶=AŸKE»‡'ÌR\]•y-¦¿n «UüþŽ²ÖÏk¹5 WÙôÏÜLþøÒ°Ê.({í“ß×Í°ÀÔç¤`û!e0ª†Á 4mŸË,¢x+A…xb.j3Ëy¶tn…nÙTãªÙúA–bÙZWš¾Vfz›uÈKs²ÉõÝE;88YÔ ›ÝæRK9ü=ÛÎ@`hKðbCëI¼F‚[ƒ¡)šÀ¹i­Ò^¿wÛÃø£‹ÀbU:ÚÈuªVW ä¥X« ©0W—2èfm_hy…פœ~éÐH¾¬ +Ì¥-úøYÁ.ùëšÕ„“ZXgivÝÒ¶áý@–ýÊÅ› * §e/í±C‘ >4‘uˆ”¡lSPG‘0¶\ñ@QM}ë©›ñ“Ðb-£Ö%SËž³§GJï|wïý »‹³KQÉ´®z62j[K]²mÕ¶Êö>¶wÂY›ª&–Ýð 'Ùáª-œ‰ ñ¼ü˜ðð58ZI¥ÉÙ ö.6ô± }l1TêÉî$Ü_Ùßßñ¥¾ÎÇÐUªØÈ\a–Ü(lôÄ£º¯ÁËs IN*·wõ˜*öýT¸Ác“˜³Ò¬$À5f]ÙŽ&`l±£¨Åz‚KMP¹ë óÐJ;”i˜‚m“%צ‹ U6åVî!}îuˆNô¹ÀbΣqÄ|Р±4*±ØÍqLJMIÙt± oj^¢ð2 6¯%K:ï:»lå'›úO=»§Õ FN?)z,Â>´—St:™„ï6%M`w*íÊ€ #FšçêoÁÀ+_Ä®øÑ6IÂPGÕ£|Æ™™L§”# +UeêH‚Mž¬ ³Ž;þpn§[ÿ¤¤³QÌt¬¸²³ H(R¦‡‘ÚÿY™¨ ¢jF”@ÇZK¨nK)Kú×%ðÁ»ki'üÔÚ"•$ §ƒØ4¶îÌwÑc„~cÓɪA-T»ì-ݾ®I?íÓûîfgí¦¯$Ïî+˜óÛfD†Så6£Ø“†áCQR6¯êÔÏ6¦ä«V~Aþ¯ª_9&š¾šœÈˆi5ý÷²£±|dV¨oùÇ+Y«û©ê‹Ìdo97$!_S_é‘’†öWLŠwE¹«§Ùøû¢)¸uL“•˜•(~Éçt—3Œã ­áÅ¿íH60¿·1e)f{Û+y¦É{T¸¿è*m?è”æSa% R/Ź¹€×[6Îñò;•Œ%û‚Z\ýÙ¬ˆ<ÐzžÌg^j ¾Í¯óÝЪ«¨¶¾a ÏûÞŒÂE8Ñ"ÇS2Xh÷ßÒ“=_  »E«_vI°2sEÛÕPôÓ¨_ë àŸ@‹o°@€óÞˆÃej}•Zèž ¬”nü-¬¢sÆ®ù¥åðFtî:&jþ-é‡qÚ¦ÈhtsÀFq g”ÖêÑpŠîCñ o÷Û­8Єl%YvZ£{¡ÑÂÙûní0 +^©D0yK¯xYUŒyŒpÈx¢9*„ !^µsÇx[…ñ¶vxØãÈQÜ?1aLZ³õáŸ:9á«56{MØvrv_ÛÀ&àDª’N[ÁÜáú$9󯘛Ù˳¾ð‚eó×gñR!iæ L¿Xnf'µ–‹[ÆÀ׃¦ÈžÔºÀÅ ÝïRñ!™n$ƒéÄò#< @ö¯†¾—øÚ¨2n±¢´Q/j µæ$;(þhP3ÇáØ-Õ(ÍØÅ 3às!H­6=@üuë?”ýó§0u|˜!í5©H€܃L?%ZN:ÙžND‡œ•F§ÇºÞ+Yµº·Æ"ùt£×âžjÿ¤4·wÚqt(*µ]z–óî Ðþü‡Íä{„Øôò×ñu£è‚Áö¶à $¶Á.YÿÿqÛD²€G•£ßô<½$ðÄe{Óp*„ê7U–‚óÓ1v|ê/¸JZÌIOÉ Y!Ó·ì±öAk2WT<ËûUŽù—A‚•ëÇŠÔ@j ³ÖÁÆC* ´\4lCµó%—»ŒÒ¿cÚÚežqñÑеé$ÎoÑ}1ô‡¡øgܤ~önƒ+¶5-ÛuBlÏ<_YªµbXjr3F³ÖñßL0F*)WPg…¸ ­QeoرlHoÑI_–B`mæ]˜zñ ¹cŽ,àn›z;’Eñ;\¡Ñi>0ó$j5Íg¿„¡û©û¾$‹^æ.Hc =`h)e$¹¤—Ÿ¦ÿbE<]`QË&ôŸÎ‘SØc°WV`´ [u‰(³lå»J;Þù Ïó€åwþ´ÆŠD‚Lç,i©+G¦]1 „)ƒ „ßž Ï0‡{à g$*'|gùcjNí ¢ø7¹;Ñ%¯ìµß´±d=¤Ü4¡{cŒgÔzéæâlÁ'à'·ÖÝH3LY,k$ 1Ä~ÞÔ…§¶jOf;`§|^1j*=¥Ý•iÒ_˜›¨$¶‚³õnHͽæò%us1ÀXÚ8‚5^³¨¹ü¹©„R§6¤•>ÀmIeí¦r“îE„ú —ãÅ< ¤@¼­'[Õ}祠Ÿ˜Ô¥ØãIƒ/4>æ(«Ô„:ëo¤Ý”+L§òK  9'ìQܬµ$±bŒ0Øff4{lS²›„GõþˆZ‘od½Gû9Jᚤ¸y2—Њw³Ø=Lã£Òw|G¿¿q| endstream endobj -1972 0 obj +2224 0 obj << -/Length1 2519 -/Length2 17749 +/Length1 2854 +/Length2 29958 /Length3 0 -/Length 20268 +/Length 32812 >> stream -%!PS-AdobeFont-1.0: CMTT10 003.002 -%%Title: CMTT10 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMTT10. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. +%!PS-AdobeFont-1.0: LMMono10-Regular 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 778 222 0 %%EndComments -FontDirectory/CMTT10 known{/CMTT10 findfont dup/UniqueID known{dup -/UniqueID get 5000832 eq exch/FontType get 1 eq and}{pop false}ifelse +FontDirectory/LMMono10-Regular known{/LMMono10-Regular findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse {save true}{false}ifelse}{false}ifelse -11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /UFPYIQ+CMTT10 def -/FontBBox {-4 -233 537 696 }readonly def -/PaintType 0 def +17 dict begin /FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMTT10.) readonly def -/FullName (CMTT10) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle 0 def +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMMono10-Regular)readonly def +/FamilyName(LMMono10)readonly def +/Weight(Normal)readonly def /isFixedPitch true def -/UnderlinePosition -100 def -/UnderlineThickness 50 def +/ItalicAngle 0 def +/UnderlinePosition -167 def +/UnderlineThickness 69 def end readonly def +/FontName /XKYJEW+LMMono10-Regular def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 65 /A put @@ -26822,7 +31969,9 @@ dup 67 /C put dup 68 /D put dup 69 /E put dup 70 /F put +dup 72 /H put dup 73 /I put +dup 74 /J put dup 75 /K put dup 76 /L put dup 77 /M put @@ -26833,8 +31982,11 @@ dup 82 /R put dup 83 /S put dup 84 /T put dup 85 /U put +dup 86 /V put dup 87 /W put +dup 88 /X put dup 89 /Y put +dup 90 /Z put dup 97 /a put dup 38 /ampersand put dup 126 /asciitilde put @@ -26871,7 +32023,7 @@ dup 37 /percent put dup 46 /period put dup 43 /plus put dup 113 /q put -dup 13 /quotesingle put +dup 39 /quotesingle put dup 114 /r put dup 115 /s put dup 54 /six put @@ -26888,126 +32040,181 @@ dup 121 /y put dup 122 /z put dup 48 /zero put readonly def +/PaintType 0 def +/FontType 1 def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-451 -316 731 1016}readonly def currentdict end currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ=Ó%åW˜)-{Ùr½uú•)¯œ‚ßröA•ÉÂÜãE(õ@Úý{ë¹´‡º“Q»û|ü_‘RÑå» -ØÐÆϤëA³Å -T@æ|ýq|Vk¹¿J% qu8P¢øwÄGx³ÅªÛ̆ÖåQæ¯6K üªÒ-UŒ\§Ô%¡bÕ"t-*ðxÔõöÓŸÏÿJ‘+ -}ì3¥{Zà2ŽùÕzݬT2sÀ$ZõÌÑ&{Bè–J×{¨¦Œß=מ0).ßÌÅ|Ë¿!í:[LI0lrÙ9GÌ$}»³/Xà œåS`p*²pë×+fwwFG¢O¸¦Ä^æȇ ÷ˆr+dMvNy{æáÙõYëõKëí²Ï[zÊ[fâ -âÜ·BÝDý‡q}Môæ8À|¤j›©$±{ÃjÏìpÓxæxS½Æ«Eîu -¤•™¤š¶7%É‘õ±4\ ¯9»b \Á)[ÉÚFUÛ.qñæþ4í¡ÌZ˜8Kûzwwµyl”ϦI5b·jc‘íZ¬™Š,“7Ÿ¤ÖɪK±:ø#@JÛd®@Ñ»FìÍM‡ç÷Á_ª¡ûd#g+²úë=·è>ëŠÔZ‡a¸·—WŠNõŠH5Y/ÚãìÜ .rå¬pf{0DÀ|6@«çŸé;]ê/¯¾h|ÏÐ.ø§RAÞÉ°KÔmœlö^Ü&WÜ€‹ÝÇ{·¢ðýmFÄÝï«´—¿ÚÕ™É3FK~I†¥7@T7AÜMûOPŠ.;ŽÝŒHÊ+i†X±ž;}²’øžæjÅYXõE×u«¼£5Ç»pÛBI¼¦á¡ŠS ü© 7pQ½40cH™ÔÕ4ðu[ì wY½Ã–šžå2¼[tÐÒÉÊ–Ú£Œ´œ?hô™Š¸bœáþ*Ù$€ -dÆ¥`Š$ P… …›‰:"‚ ñ©)Ž©|UÑïÆ©ëŒfù«Dµ¤tøÔd—§ =¬›œ5Íó¼+¶¤îj7Á%ëi@ÿ—âÈ)шËT/ÐqˆUÜÐFé±÷¹1”tEÏ‚Ò·Ý¢@ Ñ¢€1ã²F^R?–-•ò¦q„^œ!ûŒ*1gw÷ç2À¡4/¦ Ÿ¸3¿@«â\°7zÛazêÑ1é)‰·.à™ðó‹Ex‚Y'ºÈª÷ê5ˆFc=nj¾2â½ÕÖcèw1 üøÌè3€bŸI]‡œŠÁy¸²:ŽÌä fÖ¡©Üê‹“åêñ¦û¨fgì%¿p¡ûÑ^`´(•­Ó'm™ú K'KâÓ '>ŸœEàPN,:Mè{ŠS3G2R@6\Ÿà?-ðÕ^X'€ÛèõF.qh˜Ã÷YŸòûl‚aª¨wÌÙN®Upͧ`­­ôâšM9±ä_JgÒbw!◠̼sÄÊTÒ”­DÀb‚É:Wq!]iÆ¡w»“`ÿ褩Œ'åÂ`àh^ TTI«nH‰el䶢½Í’òŽ÷à`ÝöÖ㘀ö×Ý '¨pˆß(=«Y1¹æö«‰ U¿Ñ¬¨/ìOúT -%`&ì{„(KÅ:.CìiÍK9¢¿xHÅ-vâEüÔ4x_5ˆå ÙºüŠšÄ,ôž§dâl5'bËbÝq,–Hö. O“°è8Åá NNn÷ZÄGòquͷDZÖM”¾ôÁgûéÔd/.oí~*Ľ–ẾÊ¿epø˵ -1î²}o€Kì(–ï{AÐsMã`?  žµË}|òœ¨^ÒÚ§(ûÚ1¿vŸMXüØAj!Vóƒk5=êÆ&›Y¡ìiÙwó†Ë|¨†vC¯‹PezãF±Þé`FƒâJO!À«þzÆ¡ÇãE.)8£Aœt`+—ÎoêÙê”®­ €ƒz`æ!|þ­a*®ÍdçU ).tIhq‘×þˆ*0 q±€¯>ç„NÇU Å3ÆÈà7Ä€9Ot¶[3|ð5r0ó¹pÆØþûØ8ß•W˜qDLŽ¼Ž{Üí`²“Èdw”|ׄ GJÿz/·œèH'¾Z$„‰Ù™ØMΤ8¤àŽÆZ̈Ž¤<7ß{¯LpÞÃ÷V)%áèkÊÜ3ƒ„ì™To¤üCn}4>·ZÿrÈ°{»rÁUZÿ9EªŸÖÔG®áu‘49Ú>à`oq¾Í/‚¤á~éxâ–S…|œf9׬?ÀXñ.uÈBxfGbEü.wW;û…™o,À ¥BLkkKµ{9fggü,lé|«&¼€ÒMÈa*£Ô$ö‰¿v#ܶ>ì7UìcéÍö«'·‡n¸NЙÎ1Œšç¦WÞÆ«þ¨ÙÿVÜ ©˜Nu+9ÿÚ47‹yG(ÑÂþßß ¨ ÂÀŠ"­zÊâoOÂ_”|¬cZþzB¤æƒ@·Z€Aݾùà@a£ex–åb?ç>"ΕÿÉYÀ…ïîœÖ‰éT¦gB´lÙ9UýFêJ¯?"[p] ÚÊpÎhŒØÁ ÎÀ’¨ñ†U8€ÏÕ“z¡çÞ¾L†?³€ú“ü:K‹;®õú¡Wÿ%ÌX”„õ$؇:bÉ9pã´F^dšBÎÖJR(6[-T*í4!œÑyY`'Mµý( ­¯íÊM«PA#Æf‘€T"fŒ"?e÷—þX§&’R¾ÄEÇë ]¨2L€ê†Ä6 ñ?h)8&Ì/Bafƒö–“!º0ˆµðt·M<Å‹X0z½1ß$6$·}ûW<ß1jåiâ76çý$¾4±e°´³ ç©Lå,vΡTuQèo¹:Ýò=ñ}µE´Ë¨(ãSÊËÍ0.“¿¯Ž1Þ) dû‹ƒ#óÊIÍ;»€P]½Éµñ¹R¿þ•ü蕺I,”"È?›“öãbBŸÏÎÈü×8|ÜÛGÏžÞÝï\;ºH -nÆác2ˆß¯xXΠ1!?:àá¹–åt#Ãà?%¹Ñ÷•g„ŠS¿e[zšŸeÍ…} Êßh)—úDì j-ú…„ÄìO„6võzu¤@bƒ ‹Å ix´cÆÝ`rÛ&rÀç IìêÚ¹_> 1SêSdôåY$î…`)¤Â­õÑÄ×:Šî¿•] øR]ÐFI®Tkxð›jgbžH“d.(: -Áâï«œ.ŒÐ—%¤pq¿ªô>¬>^ézx,Ú{^’Íc]r›â6o®·†µ¹õ¡ÈÞ¬tH“TÜCýÏ× •O;Ðqâ]8ù$y¸·BˆoLr €{=fÀ¶¬$' FøÅ}*õƒ%ù*‹úªútds¢›CNzc¶»š§û‚5G~Íظže ê2‹Ôd«Šã]bsz'¿È:åÏaWA´ª™>4+ -ý¦5uß=JTòú­“Hˆ8¨Yá·n¸ÐÏ{ü±-¼øúžtÿÒ}Püì”tÄ¢§Jx³•8×ÈÓRÛá.\{Ê`uÝÿ[ˆµ§ô%é¹3Áˆü…7Â\Õrn‚†NJµêþ‹Ïò™ý F­k¬² ³ò¾Oj¿—½®ßÚæ+´WeÕÐs©MÕV—‹)ªø˜–óÁàªJñYVñË›8´ìÀ1Áýd#JL1?M´È?"Å›b§ÌQÁï¾¹fc·ÐÓŸr´½¯ ar¼4דÕ@ùd¾ª—Á”çv¯5³꺜5eÄDfR+·c=Piÿs«ÛX‘8þšÔÐmm‹€šÛ;E­¯LêÄ]œH|­S“°±‡· #õÈ¿¿§ðøË´§åç°À»Æªlçõ²ù™ã È÷¨ò„S6ùŠæðe¤îÁ¿!3€BYTg˜Ú±}T¡Ta˜Îˆ‚ˆŸ«Ïf›ÔgŸ¦»ÔþðX¦_+t”1¡2)AÖ{ߨøïãcƒ×° ‘«¨zP*QË©—S×Råd*çlNíØbjɇ©w½u_ŧ0ß3¥_¿mÃ=/èßQñô³ufô⢠-ñ'I N¢»â?Éqjªñ™) -ÊŽëšÓ²+ž¶¡AÇÑÐr¢â»©6&HДC9Ömr|Ã7r²óGù%:‡òJrþ¶<Þ(«gG^ÂÄ´³~¯xPé^€KâÞÔÁàYúY…DË“"ø¸”$?f·7—Úšß}¹’XËäoèˆç3üœ'â+T…e|7õ~Ÿxø9F¾’ÖÊ“ •u -Ìiãh -9Nt’…9‚»uicæsÍäJ€w -cZ|¶Á$CÈ[ÑiŠ×eðƒ Ò"÷û²à¾Æ;3|A‘zFáY4öWØSHʘJ᧞é)~ÿ%œ§b[¿O?ÃñÅ;ŸRw‡ß¹ÇE­5£?"žWå|€;Õ®I6î”X'\dŠèKùôs}2Œ{QA)wJêtXàŽi3uŒR3T›q5‘b|~ëú’<é2ûÀ7bA}…Ÿ™¥LXÔO6çû/'`:ÏÔñ½´¿´‘¨ºñ8gŒI$õG,•ÿÌM`y}GꦜÊ?)c’ÜÞó–šÿ7áK !8BʽŽlÉ.Ü oõϵÕuˆv -K·„S?Í0Z<ª¼òM^žoEáÖï” §´"1é¦í\­eάI¶ SsÉBIT·ŒÕÔvòÙe赡 R»kvl$|­i÷ì0qpð½ -QxÑroãHúQˆ³™vzñç›ÅƒðeˆÙñ4`¨ÇÌe©b÷˜º?«V'¯axð Â*áÍ Ç¡»~‹MåCCô¢‰®”õ0Ò"ršs›ÎSC®æ=ÂÒyÊ û[ÔûÔ^þÕ”Iî1‡d#œJ9î&™I…#± ÏÅgŽß¶c*»B+¨­ÜFWnd†M}ÏRYu™bP·‚­všçc?_gMk5ZoÄoÝ䨟³ƒÒf †~ßíGÂ>Ôüصë͈,Ÿ ¬˜ãgkÉ]E8·?˜ŸØ`aˆN/`p§çÎâé22i¨útË (/[Ä@ã|;C¦ß]ar6ãÕXõ „W¸,¼C‹ŠOö‰Qì‰Y`¨ç`J_B¯àh§Ôqñé8 !;É.òÏÇ¿*Ä¿LM÷¬Çïç›b¿”¦MK—a{boK›IænÑU„o¸wÕgò3’ÁÕ|‘R¾q?ð«'ú©æSúÖoIÖV\ÄÛ]iÖTgÈ‹[_ßZµ_ìj†®£þ”5¤¯¨'N8wMˆ%+E:vŒ -ŠÊ"„¸Óªï©á“a¦x;ÏY Ž`³m ÷±ÎÆeòïï©"bsàiq>,ÄZnÊè›3æÂŒeÐÌ(¥±gÆØoû¦¼ =$ìRù·ÿŸµþܬú¯Ÿ'âJ:cjª3¦‚f2 -N’µ:3CC;OÊv"<ȳA?9=¿Ô‡a’ÓÈ{úúMË»Š¶ö&}Lænu¦¥4ÛŸV[Ìà+.¢_…bê¨$tö«1ê.¶}ÉÖÇÓÁcÑü¯{ä«<<›vì÷ܸßÌzÖô‡<ú Íñ–ÈУÝ9ÌrÞµ"œb‚t¶™Ê˜$yéЪ֡Vì ]W–ÂÖÒÔ>£Ýã0žõP¤B’·W*ZCÉÆ›ŠOžêS€ ë0³é€Õaº‚ÎÖÀºåS„±5Ε÷-}7‰‚ÔÆÙ-Á›*¸IC®{1ȹ†AŠ˜ßZųä®rO‘(G n˜6ã¼¢9iã5ßbDýN÷²'wL å,²j"•éWv³yMÎbfv›¹¤ù&,Õ†H®†ƒѶ¼G[‚f…íÄ&“©PÀx¸´&Iš™ÿë¤i=(Ë— èz:‚[} š$êú>ÖÑ]´¡çIlv®yPôÙüdŒÓ[‚tºzÑwä;Ñhc¥9–¯éX S8ì{‘ÕY¬J4ks¹ð'$r+›tšý‡æ7)„)ßm&‹LWÌQÔ ãL7“)­³gö€·†×ó‘Í‘¶".ˆÀ¼ÿf ˆE›ý*â -°MÊö‚:7¯õjm›˜ ª!µ'¦¿3¹xÄ<[r îä«ënÝ^™sºÉ:Ÿ^—M{Ã9E“Å·ÑÌ8ÑÝBãt<ÚW#ë³WsÛ 3’}Âæ~]ÏNAýÑx!¦ íf”ð%þÒ™ÄÇø°wÂÙ €ìˆëÃ˳,Ø¿QÖ㌴W싪ËŸ«D®t2ïó̉BŽ›°¸JD_æ£ 9b ʃ>,Šw©v­¯Ëà0ÍüV\åaµÙ4ŸT2G+¨óÿFä]ôÍ,Ùšå]©z ~aŒ1›æ›CÑÈãÓJU­Ús/ 'Ú«À±ÂìÅ“k‡¿[ÇM#I8ߦò’)¨´óq±U‹ë$ÔràCO>Å°½âŒtMÖý>úIóªŠV­Ì&¥õµ“€Mi`ªo k¾Öà ÊPìÅ^\ âm"¹eð¬]V¯D‘¶ÄÛ7uë\£»»~ò&øbÉìÀýOŽÄñ4˜ÃtÞ¡–KÛLôÔ¢”¸‡N\™-ÎvúaK’í¾D¹­~2W^€"á‰Ã¶¨ 8Y´JBX5Ó"ݵżo¬v›¹Föv$=MÁ ù˜}( ˆ7@L»&5ƒ|®L£.`Aù@‹{åšßhrÖõ£Y´i¸(ƒÀn$›we‡Bp{o‚ÙǔԽxM}TàÛ}Qž¿4ΑÈq’vÛBäfSDžét•Q…ë±¥t»y%שÊÙå &[ßÓ€£¾˜çIB¿|ÂBî8™ÞLMGÊU "ç<ÿ¾B­tbv@¢1~8·„[&Þç>6 ¢F¦´#˜‰_•Õñƒ¹>,òomðq™À;¥Fo£ï½w圠¼ŠGÐÇ€G+WÐ¥á©øQ¿®„‹wÕ]é6ÆÀ®:l¹‡áÀD-K¿ ¨!”c‘z¾íÚsVriÔ)¢˜tå8lÆÑQ€ÒC·+>p—o"Ù•ÎaÛîwÁCN“n“>¾nÊ&ZÛ*ò~µá¬ Bb”„0mòUsQ_#25}!ò¬PršS­Uj®ISDòÜ®ià•edð÷;#ëà…ñÅ>¬PîUþ^2w7©y…vÉÏŒ›±!nÚ{Éa•|¦˃{ºŠöÁ/9L0’€P+%[sÖÖ­ #ƒÝSôÑ_!’—`%J/v8G˜¿X·v@ß;#b!O®­ßs~¿ªa45LÙ×9߈6ÎûuŸÿ»Ø&ÆîÒe˜h_GrèópÉdv´œçöÃÎ<‡™¯„ÿ¢J_?žëqÙ. z¾‡ý„™dyoŽ»¢(áw/EUÁ¼€ýäÊý‰**î-|Ç ýXL$ž—†íÔƒµÀõÔ»ÉÌ`z -†Å#ÃnåÔ y•JüÄÿKÀt7Š7Ó«ŒÂ6bk^1bÝç -Øy:šM_£Á­ìvÿ×BènÂÖÄð¸QVECÉ:‘÷u‚þ®ÙÕ®$PO.’%S¥î™Ô Õè¨kéÜì€ÀE&Hõ=´Œ<Æ­ª¶4i Ú­07L'죉úp;™í;IàªXbÁ[¿-DÞ£CFi¤ÂµÑX°”lhÓli÷,¹ÂãŒæ¼¹öƒº®-†¯S¢£CeÒæ`¦˜EÁ‚ûï…I`ÊÝqv‰˜å¸ÿRMáSÌ@Ñô+¨Ë'Ìí÷>ÿQäe—ÆòYh…™@ —w¡Çuæ¢J@Ù]ÐT¶ƒç„J¼ -WÊ€«N’qžáQiS}Q”§ÜrÖýµ¶2yÁ¨]X’aXÐNý™.ç‘8Õ¹ýÔË×Ãxäpw‹xñ鸵º…ÛÓ4‘ÛûªjO¶Y‡æ´÷žÅ Ô«DùŸwFÔ>¥ß¬ì¾¥Xó„Ü'±4µÕ¯Ö›B‘‹2®’‡ _n€¿™ŽŽÕ#¦.ç¶ÝEœA{Sÿ›…ð*µsßcÜS¯ -ˆ\mãY.n² ±š\B2‚áúûE†¯ðOð<´_½˜ø”&Ó6Š ÇCöN&Y‘Ï2·1h I¾’Ü-Ĭ±ÝDœõ§œ¸&q(ã7q{é?º:9jà„ÝA‡Ná+?>í~Âx­¾rÞlt(+—–ƒÅšÐ ”ˆ3ZÌ2 ÖJ¶ÍÆïv8ÌHÔšx‚ÔQš>w$ô“<èZn:¤Cb¡•à„ËȨx]ªøg¢¢EϸÁ_ןž©Ím„Ä(célšø0/¸µÜü*•âMžÐ|‚PF–ñ¨bK—K•1O¬–Õ.åµbyÔmñ2&Ö.@ µ_S»¨0-hB#Ã+½ýÛ¡f2œ™°=žœ$É‹¢Þ–KŸ_Æb`,¸Y–[å„©´l„(hPú\|ë] -ˆöÄÀ*øcN¤À3kéìÓžÇrzþ-¤µ¾\ÿWÜöh« Xa:-i~O#kyö®Gд•¨ -JØÖ@;hÉÅ-‚ /ý‹F!?*Eô~öh2zû3 -µ…*Oô3£a½Dôu°Y[ò‘ns± ì]CÝ)uVŒJI6U5º0í[ц§îډϙñSÙ}›oÁxv¯²I‘lñG)Í«Ý¥WÆÓúz=UXÅíŠYÎk¹×_žÿ’6]ôäQò^bw8‰SŽægý!rõì¢ÅQRP?B¹}’} -U#Œ*÷í@6,­=£´.7ÔØMß–ö_ƒ¸^bVVÅuÚÆãšC‡¡Š©B¥J9œä¸q'CvÌ•ô$‡ž>?üp¾>ów¿žÂû¯÷\ Šs$röHêv,¡!1 ±ŠŽ$oQðRT¹'µ'½Üñ`WN7ºÛ§˜×8I$ÔLy èÈÚ¶Ûú¨ëTêIbSYâÝÅUÔh»ãô4÷¿‡Kk‰ˆ Ðiº³Æö®/]²"–gxMÑQïXÔpwâpXù4Å͉‡[ -üý|6Æ-’L›0Q?Èœ¦»Mn–<‡=xJKŸ«ŒÆpCð‹Öƒ§ï³=wdÒ‡SÆRU^ÒúG¶PtöÛofÕ48¥—IÓÊXL7ÍŽ™‹þfVãH -;‡½ÎUÄ:G25ËY‘u¼ón”ÓÞG³¢}?æÚƦh=$z~]YIÑGy4!¦WJ Ó6éì™HþzWöƒ°%œl{Ë»79èvSäØ5[ìÆè¿pWˆ+INV§‹Ü¹ €¤ƒ›Ú^”á¼0T*ìa±º”Ÿ³ìàKWÄ7_:þM²Ê&,~3x?O UYnÓÚ-¶Öé^Ÿ¥ Ê$m¹ß‚¿€ÎÓ}7`/ëA"ì„Ø wY¹9¶¤ÄÅ>‡ÀC‰QRëo¢}«ùÉį€ðeNÃ_#ñ l_GAŠíµ Û¨š… úœD…¤Ñš+Q6ôù}²((&¦Ëö–U_]zýWÓûLˆ{úìªv4XW[H¢ÐÜA~m[Ï’«Ë?òkõШ™Èw¡÷õ·¾‹òÎj—µû;]‹Ve§òˆFÕ0ÿ5ˆáqº*kpú–tùCçìz½KÍ/È÷pè ºíÏ q’¶1þ#ºƒ2•kØhA$yå´4>Ò[hÏÿYã8 'ÿ¼ã2Ï•®>×Ó·‹"Ü2(p³!æÊMºÈ˜ë@4ú9'M.2‰¯£˜t:+0 I.û -ï©Ú&5ßq¼|úWÓøF†©¥¼‰+¯º…}sFaØCžTü¢‡ù” ž’xÔÌþo\£›{´`¤—íq=[f0‘‹cb%@ê¾NV!nñô“à·Ç½:jN-{ hî!ÖÀ-E5a?fÆÈ’XåáwJþQZÙaésTþ£:ï9ûõûŠ‹žµ.5Ë`"dè˜p´òŸ3R‹ÇßƵp~HLÀ«ÀõRhëŸvÈ6bQÚ´ö§?v*^¸ù.¸¥«…ÆxþA²Â¾–q‡?øÁ,1ç‹QùùÂóe­Smla·Šëãq‘ÕÈ*Ø¡õD`–쉃cµú•p¡~|- öÃ1}[ªÂ)þeÝë›ëµiI"•4^nÒٞݽ‡ÂŽå ãúSü§}D‘+eÞ³í¶Ü'¨Í`ßòó©Ô8=Ä>‰uL¼é”AÔ ¡:ÙMC4 ~üüZ#×ÓjWi ”æHXê‹nDКnnÑcàÈ;q†aN0ÙÙ¿Û«H†¥MY&4!® -ÕÊù§:’ÕÛïqg¬í4ž E*u"jd›ní {ïD?wCp8ÎØ"'(î Sq¯%IÔò2œÑ7c½þÍøjY€rPaHÇ°±''+½¤¸?gò®Bo< -FˆžúìÐäÔŽÉ—t~V¶aCqÝá˜ö*UOÑ;VLñtv€æHô±‘¾ˆÀ€¾ªŸ[nqðsÑÖi¿x¬8óê1@ûZ¡ÎËwS9.ÀÙ‰¡(á(Jo=Aƒ¢o㬦5 Àó£˜Äÿ´_câlã(©–‹Æh$÷¯ÅUqB'„"VL¡„ϱ‰+¢ZÀ ”ZC šáký¥“òœëô¸ÿÓè*£Ä f2AÌ3°$¶÷––yk& ´4C¤ÓëfÞ×Ù©@2!à]JX»;!ÛM•÷²,§Ý–yêÞÌÁ¿Øâg¼UQw1ÕMÝÉÌw¶ŸŽš#Sþß)vŠ™Ì?=l½úyZŽ -¨W¡!`™+Ô%!³@îUê>W¢©Ê1A7í¦8 –,ïü¬ð÷|âðƒ[œ§í%Û-íX7‘|*ç>åÛ³êØùå Mræ°öÈÖÃ3ÅF?ÀZ>ÃïÚ-ì“xñí76àÜ(Áë=Úµ'N>ü/® -3ÖqŠLT>ß6—Þ\]´ª³é1TÃØ g‰ÂZ—P$æs^îVUkb? ¤=ì€úš"ÀΦ™pü“”^b~ŽÉ¸ÛmwÖMu¸Ldͯâ§Îú:' áƒ5­Þ•+T¡“È6Æoñ;fI­W¼šúu€b=âJ`H‘;á—z–ît€žhþ Ç‹„É,.ñ†èl†‘6¶nÜ$y¦W¥J?"øš&j^d9yÌD¥£TŸ$ß´k\˜fŒ -nÝùGý¶¡&CO—V$ì -ÿªžQÅ”2QF{77`¼W§/o(âàã ¿ÌcfUTLaœðûqÊx!1 .ÀÛ SQ“Uô^È’³å‹d*ÊÝ(u} E&$ºíHŒRÈH[¦ -š̸ì+Û”Ô]Š–=Áï󈮾â¬ü«3d?äéá“„7×&Ÿp/jö¬Ýõ°ÜùÖÄ~– LQ4Å7C“Ïë!¾(^Ð- 8í<(âóSN7¼©Ð1që·`èÂ[ÿ à©Ïo,¸ÜþA -9¿%ãb.RUÙŽÓ„¾MúkéÑÓŠ¾16•FPoÃc|0Kò:E°W†óòÈ`•”ÖA@Âlºõå‹ /ê‡0;¸0B•ýB‚IçÀC©`v0\bÛr,öÁóIÁ›vÅð€”’h¥ð•Ýœ•äé‚žùê%U¾Yؘz󣟜3w[Ë驪Ï?%s}ôï”SËbÇù­Ø\W‡²,°OÚJ'cÍ'ýÞ©šƒú¼Aý -§Y³‡5*Òì”Ô„t·v”fè†MQÅZ”Ë:Hï%Ý:ÐÁx)F -^¸·1Fk5%´&ª›••Ü˜ÇDX“µÒR½x{ù9å‰çÅá&¿^X2»P¨÷·hí¯÷cm„l%r?cõYV9íêBÊ”áÊdû¸×yËÔ r¦ËÎ ̯ýû7¾ —Â××XB¥Io«g™Þpí}"µ— ®ÍÈD⎻ÀÄ„1^ä+ȼÂe*åw; Æ¿§ƒ¡ÑsªüºçÀÁù›'Òæ#fúò‹@5>#Aû 9 -D+îÄÿžÞ|×ZñxjIo»ÞtÇå+ªMƒVBŒ7‡ý®‡N1j©P?ð”NV^èú»Dé³ã,ï~ú#¨ã<;:y~æÌ6¾â™-©aŽ¼ÞœÏ¢Bˆ¤œ¢;¨…}öèݶ?ä1î•Ö®$‰2äx')Üøm{»¶ƒ_ÍíûùDT&›%—óCÔž÷”neÇÐïöBÂÂOÆ7Ú3µ¦wðå^rg{©±³QÎç³Ðç=SD®Ùå0T=DQñBoÊæWí›"Í--ˆg¿ù·™í\ê Å ƒ4 £È»¿Å‡@¦röT«—ÄÁø[4ÿs™7_-yÍYŒ+&eäe¡ýÆ3“ôuØÈ|TfІÈÒ+%‹#mC t<¦™m¨ªš)Œß‘SuôF?Emc Cµ> jœ¡˜£ªuê1Eõj·£*)º¾Ö ɧ뗂::°Ë…oN¥ÅðÌL_±+bmõÈsÐ8iŸŒ¶yjOë»_þ¼_r(wæqË¥º”tÌh.#¾rPí2ù§^¹t5Ù­s”¶9{“¦ÿžŽ±¿ðA<1Ól~ë;̹ À¸<`ë¼ýI:Âìb"èê³LKâiÑ_Úïy²–_@³²»œ¾Å–^$n Ç@m;š®k2ÏØGY/Üh|°gås{\u½m[*¢¦å=œÛ[´:â…é ¡cc©¨<G'´ úÛ5¢KèO±§m kkí„sYdðsU±9ŠH Ø“’’Ÿ‡?D÷Ño¥Ç“S:;10ÿø™ÖxÑ(n¸Uœ¨úÔUƒY«S¹É${¢]”ÎõÅxàWó?_Àò»mrl>%J¢D‰€IßÖ3ˆáÝÔŠÓ9@¦‚½±TŸ[U_4O ¼ B»ÅWOÜæ, ³‹ÙCüé¹äÝ=\MÜÅ ðâï:"5±k­©ôö·'úšõãÁ"éñ/ÑÓW5sWxsvZ_fLQ Wbr¤“²(£ñ#¡¸‰Ñ%’õ úåRÒ” -+û ¶‰évß'ÒwHÀ$ƒQ)¼—4dHç¶ÊïöR4C_±|‘×έ1ðÁ®W¹à?“)ì3}iWô¹¡FÓY@:‚©k„“Ä{_ÏSƒéŽä{C–A«XzïÍ+Ö<ÿ„àÌ$ÀæLù1^émZûm¸ ¸‰frÒ0¨ o{Ã:>¾±¢Î`LáŽPøñZˆðH»z‰cˆ”$¤OƒÃ5Á¯ä¼BW“?˜o¥Dø‹3)è0 ¾"WVñ˜‡ƒÚ¥çå§oÝ«ý÷¿"Ùð®xnð4â•‹&Ðñêו/€d;Îꯆf3Õßó·­¥™A¹í¹ÚnI§H·¦O•¥‰‰K°ê áP1šŠ…-Ë2J[p™]îO¯µõüwÉR¨HŒ,]§¹‡2“* €ú>”î*{ ”¶Š­‡w¦zBç2=QN (;û"Á.·;Qð^?¹Q7… -‘í&5k¾¶š:_NŽqëqr4›óðŽ;0D 7eŒÒÁÒÕ¨*gjLÏ»éÁ#€%šfó0Z$1¦;!N" ‘¢sL‚úpÉñ&¡¬›_ƒ{P¡¼FUÕ`=´¶ù$üØ §Óñ±mé¾&Þ‰×ãåüVtmL÷ã©®¼VBÓåiÛOêÎ?†é¢[ZPFMv'ÿ›€ÚVZÎñœÑ•Ö¥™ÙÕÍaàIà,$i*øn|ÈE+N0VˆWük—„]âtwhp°Á´òç–íd[Žíš§hpSô–4¦ø_zW†ÏŽšaØÀZ{\·¯¢ž%tNVÊ€“Qüx¬W°I*i?ß­µÁýßÛÛL5W;“Ùò~6oVeÁ€Ù¡gdãcY†ùˆ¶—Úß`¥ý«)^]7ò·üžN$JCBÂu:³¥><ؾP(á-–«œqƒk2º àQ†Ñ‹ÎFzi…RòaN^¶#jŽ…äÄÔ9aÆ\ÿ,ÙªÍójåè#Zºÿ7Nø¥á„;¾7‘ó´Sm§G;œ©tËO¯Û¼ñ@§j.½R[ ³n4S6WXõ4æá„U7Û_€mW¹°žáм_óŒðÕtk¢Çt²Q0p‚‘+äöÔA·BÕhŽÑ÷ËL›âaÆÆïñ1ÌÏ4P}ÎnkÓ!ù³Ó´ZZåÊ‘sTÍ{Xž…¹Öâ…f†ê\ òÆȺm4;½D=8ôî¸~¢K0çúfîv±Gù{”ˆQrC½ôbF¥ÁýZî4Ùìs&¨ú@L%Åp…ÒlÜSä`&;87ëz>6櫧›T÷)F/[ŒÑÊÐP°Tv£(ë§ þ`¹¨¢49g@Åjö©òßñŠu>M&AØ«ùðqK1ŒÛ7¾f‰B 4#ïÓ4÷°ª€íÖ‚g±‘uÖgðs×=·#É'Â&WÛ^2,Ÿjªb$³¬ë› ùõ„v‹L¼ó”ÇÿêÐb’!¬-½]ù -ÏÅt¯œÞ^ÊKöuBvp ƒ`fácݪõô\9!¾{„þL ëIƒ14A€ð ÿ -6q!ܾö5Ï\Ž,éŠkÿš­ÿ~mWùGk±}!Ó¡¥ƒn-}6 Ð.z""XCð<ŸÔ±¹‹HÉq¦Ô4ÍÛ‰ÚÍ¿’ÿœÇñùñ:æ&d°±‚øUýŸ;æ .‰wv•%a¿Ø Å­¯»Š"ýÆ©‘ù”¿Êoᕹ‰ÝûÝ­Ò’R0âƒ{\Qf)òF<Žt7@²p™j c.UPXñuÏÓuMe/–¤žáÒÛ‡âlä²½üø½2cïòPÊcíb!&&˜¹?z;ËZt®°ëŒªíÅèØáMÜ„÷ø/UO$>‘«ªhÜãŠuλ<ïÊù‰óà}a¶”§š6Éø‚-‡Ž‚b£{O²TÄV¬à+í噿äDMõ_ܯ¡Ó݈àm’ € ëþÙö ‰¨ÑLlýÁòÕª™ [0/l|µDà…Ìò(¹ìáœï¥AtZ’ÑVÓÔgÁJüM‡ˆjk·7ñõøÚ˜¹Ñ=¹ÚKÈÛ®çÙ˜øl‚žš²­Î‹E‹=;aÇ}+£jX;ûô*´…Š0aÐ¥ââÂÄÃß~¾YDŠ ›x³A…S¼¸ÑÝò‰‡»4áúkôï­Ø¬K$¨Ý„â -4ðbú“»é].ÝÏ7ãÆc„H–Wòíbe =ÅíÁ ™1}û~£âgb÷œ·Ö5Bãcn"-£¨ymƒíðA“Ù¼¤j³óp@¯M¤Ü`ü/[ÃTÊIBŒæ’üøÇæ½Ì5x²áòr•‚:–;¥p -¡ËÄVmž?ÝöÀÆ^å-wЖÁ_SŒ‰#IÛé.œ›Ý½Î‘ð‡ ˆ™¦øZνÒë°Š·JOÄ­Ê8,¦ž„ö~Æ‚Gò˨µjŠË×£þ¨Eç”dvà1ûœŒ!Oè20̨ޥIÂ+L¾NŸxê‡:È ÍÝ Sübó‰8žk…Ä9g4&mXQíG lÅF+÷ÜýdÅ“C¡CÎVL÷Iv ƒ.rÝ÷¾ê/1d—Vv—¡‘£‚æaDp sòà®R_ ON1ãsb;¬ æýÛ7}N¹fÃÐ4J¦Dkg‚DÝGaaÆ1üù¦ˆµ$DY£ ÍÖè–Re»ËׂL.½Mýìsê{´eHÅ^g>(+1~žþÈ÷½(¿»­Ñ¾P¯¦êP†Ùpœmäw ã®ù¸(*ÏÅW9XÝmÅknr•æI9‰òõ 0º”mš»¦ÏLtû€×:ý=PSÌß³tʆæ/CÜÃ#1ÓÓÄ: -$äÕ›Ä(ŽÀ‹EW” ¬Äƒ\^ó½|`__ú*™±h¹j¬ \©¬/f©;&$'™h—,5gÁ4 GïzçiVWS#ó9ñhP¿a Y¬ «ìÍÈ.…¤ˆÆŽ –#eÂh¢öPqd('>öš±æ,}ä/h=NÔj°›æÿs­¶]Ÿ¸€DŽ¶Ð¶¨h–†p¹ðízk@hO°.䶃7-¹‘TAÐá&´:Å´fÌá ¯¹5‘ŸTþJÜÍ.îÏ1ôFB”ÌgdD¤$‡Ðt,ƒSÊŒ'®>«``ó|¨ß˜ŠÞÄ(ì /Æ~=wpsç¿å“¶qd«EiNüíŒg@å)5_¬Û 4Ú¥ÄN õ `ó*)†1ÐdÉvþ™}SÓ©TŒ·O!ßZ8¢ï €¸È¤OPý¼'~êSté~%>™¥§:Ó¥=wè}¼ S9l„VùñZÐâžå”¶HËQ,¼©Šˆš«·Aœ ©å^tú¡/ìÀÓn‰wsµN 9ûaÞIŸÿ™‚¢ ®Œg¢~Ú£$9pµì¡›ö 6QCñ—­J”Aá…ÓÉ¿ -ìÍëγÃE¾6=“?ÚÛÚ%È!U ƒhø;‹)`“t¤q|â§(Ë=M)Y»®FŸi¥ºü´ñ»¹ i{^™È[Å£N|.$@Ú9\P‹}¼š/¬¸DâEû°T é šŠ7&y˜0x›éÌÕÕ×½ p~D`[ -u€/3Ðå9:¡º”ü|z—€(i­ÅI"ÄQZ…—‚á•X"E¸bæÅlr¸†=qX¤HüP*¿UNäÇTA¯b¾YÃF[, -yæ ¬€Ê?0-š7oßPÿ°ßÞLá Á8º‘Ûh¥}ažÚ›ÏðÚUü¥£s¢"Ñ8FW_´â‰LÞ(®Mù1®ý4ãe6D¤:¡žÞþ]h¡tó,èò @`ˆ¶úœ'Î2³f‡‹¸Ç?²ùvn½º—¡Á6–½@P^˜Å@mÙßÂnê¦ôÖX$µ8I-ûKŽ·A6B–4ävg`“ØGlÇÚÛÚ½g;ÿcû€«¿<­k–'ÂQb#U, ŽÈI2ÆWg] õùé$ýâ½’DKa°‘lX:úZ¸àþ8yÔ¯þ„˜«–È-îKÔ$s؈B˜Ðks¡ù¯ è[ ß™xŽÙ4ðÝå%Ÿlyé LåLD -@¦Ÿ¬&†/ú–@3†%׶9•£éÇì˜çûÙ Eÿ‚Œr’—ߨÆßKÍoÁU4 kó¿JŇ ’h-xh¯Õ?ZB1g&눬‰áÌ]‡óXÇØd0LÊÉ>v¿rËŽ¢¥É@b¾¦â¹].`xD$âYÇ_kµ,KÄaȶUŸó¯•¨Î[á£Áúb2¤¥©·ñjÁVUà£'b±Úûv~^zÃ|óqÆÓÿŸõeò½A)!Ü3= ÎGe{!£}ÔB÷J@vA_<6]LÓÂHÚ ^¹¥ÝJŽ¼Bø‡S[û`ƒ§4I<˜ÍÜQ×Ã;-ã7àh k h>6F”·OClå±nÄ/‚Ï3‡„è–Ó{Û~ßš®9lûìû\*o„ÃÇ‘ õ#Q¤© -õ`÷}ßy$êˆñ7Ÿ±Ã7Êì1Ž™­Ë׉H‚*)§0„ü8;¨ºñyé€è -HÓ/ƒþIÜ,%ÚY);Ÿøõ=ÇGñOOž#€p1);»‘ -Ø£®O_LJ»%²¥ûêg’åvKãý­t…Šfõ¬Ýj‡}殩Ã;TΠ½4§ÀËö~eP÷Æz«üÈ­} ; ¥È“j¬R£Z£¡)÷'¾uZÑœÅ3a€ÝövÚ¼˜c8'éãâV·*£á=‚³o”Ðõ&l)*ÿ­b¶S_ó“ÔaxçR·Ò~Á€D‰ñ•8ÔmX¹GEnßöëÄüv¾ -—ÎT‡¸ê[9Š4m®X’;³é!xŒ¨»cŸ^«}›Š™°·.™ -*/'%¨9„s…kÓËogž‚û»€DA(.†/9 %:Ø(P¼árÈP2‰(õå[Š5é°þ Òl¯<r2 –yË'/«¹Š=ä<Ä81Õ{ïƒWA‰À­Ãµ)žTBÌãç}ÒÎŽLÝ_cN?¯ku‘pcG<Èúbvt¾£`¦ ¤Ÿez_Ê5½§³?Û§í;)áa·SÛjÎÆý¢G_,“åÅx([v €’P’¶iw|ÒiaIRRâݳR©Xu%iò¿cFHˆGT€?.=TyØ"+$Wög½ŽB\ÇH™YP~æ¶!UØO–¥qä<ÄÂãhiI$È!ø½~h_® “ºÁs†Ó`r !ãÏQKÉ­cÿ4µºòáÐãvÙx?ο'0Øý4×t@£‡DXÎJÍ›‚ãüÊ‚uKyÞQZUTïêŸO‡Í¶Âf¾{k(ànL™2Íøàž‹ËŠµëqéÚô\ùc°T.ë TÂOP€ÓEfT×ò÷g>9•ÌÅí9÷%š;Y|ÔÉ>ÓQ¯\!ðc¨èæëSý®H1ë1ú4çÜ00 ö!·=ê$R'ø¦U?®p~†™m¡§ñ±'´v‹¦‡Ñ( ? ]þÛÄѽHwp Y¯¦ÁGƒÅüÜ謮ëâ²Óôü -endstream -endobj -1974 0 obj -<< -/Length1 1494 -/Length2 2555 +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿Z+—¢TXêÀ{˜¸ñ«ÉÔ‰ßZ4 +œ3<¥¯@žRî¥bš¨[‹ž£s¢Šilk˜Ð`LØ®_/É{ªS”µ&ÚQµR`øSZC^鶃?í\j!m,©Á€jêk3\Ð<‡Ë5](¯R¿•_4šÉÂQŸ¿†j|À«64Cÿ¤ƒŸ¯AÆt7Ò1[ CŒ-#‡Î×€°È—(¿PüÔNžƒú³GF¸©a|‚säpa¶³¨m6&bê» ß4NaŸaéמY÷ñ.Í!Õ +ª>\•¿rénóÜÙ±M0/¬ÞT*fáv×XòA္í Ý-wúÙrÒËOœ± +BÊæg(JCÅèIøÛÄeP•ð9Ûçúæz»‘ÖÿêÿgþVƒ¬"ÒÊzow$ïá¼È1¨¡Ç=ÄVèXqmû¤°­XNþ…¦kžMv!:´ÐäP3Œw¦€V?¤Zõd dCBß»ø1sŠ ¨VžÃA‹µ¥rÛXUÉ«l݈“œ>©ÐÚáç>è;¥…‡íÞFý4ǤàõΪÉÔMYŒ©]±ºôW‚ &R`92.:ªÄBàBä:ÏÇ? Ò‰ªs%D7ùΚð’Õ©vã(Oá+ÝÛÊ}šÓ8Ÿ©<Ð:ŽÖUy±ky€ +ºïeýJÍ&^VÂ-ó/gvDgPgç Õ²,ÍA·¬*-äˆ7püS˜¦+zZxU×)\<äQ˜AÆÙ6Z›*G@†„áó ç²n‰Z·' pÓEøŠT¶ Úô”pܹÍÅ9´Hò]Ycð}’vÃefP×m›£m¦˜õ¶ ¾q|?TC[OÝzÏ¡;^3¦Øщ!'ÔXÒ³S‹|cª"¢2ÑóÛÇ¢ +ó|!èf±´Lý¦8QR«Ž•j¿’Ï'òÌj([ûI’Š¡¯ŒÿMÃæ¤TY[Òèyg˜½Ÿ±gÈGq—yÞ 6¸_Pa²Ì™MAïêªZ*Aãú¾$ÎIòs-{¥ÍŠ‘ÀhI6ž—;‡tf€Mj©€A{7bfÅ…¶ó±{#ÂkByÃ{Šiˆih>¯ÈóA»DŸf}Ù…¶Ûe_dÙ#Þ=Ÿ"ƒRõþ£X¿LØv¡Lëë3ÑBòæjªŽ@ûÏÉâ=$edèKpëÔÈèwwFü b|3 e™òà\`æRït?!ËðdøkbÖ;À^m;·p¯›Zt~¿ –ÈUhçm4­]M1OÌS‰„íÇ ›n$£âzIÎŒé“ÿ5 ô 6Ȥ—šˆZ +J¢oø4ŽÆ!‰´"—†ŠÒĈkâ:¡ƒ=*RÁŸ†•©~z¸ ú˜Ù·ÐÛýéBÊp`³¶,€6ñ`CÜÆ»·¦ç®øËÇÿ›jÓ¼œ€â˜ød’³Aƒýãw¨è¿‚í¢/Ê'NŬWÀ#—¶?¢"b€º-DHÔ­K‰¶}ôÒÊVš^þ…âM”tàOÒÂe/ ž y_Šêòû4ïs z\¢ù×ÌgÏðPníƒî@ÍÝww˜yý7u ãçQ‡ÎÓÚ•6è W¦ð.ßCÅë *šIŠ;²…%E–E§Þ|±oš$AÑ¡6¬ÑÉx + ò_©iš”hA%£„ø¿fóGîm¹TaY\™8X!f=;ìbÊÓÀ'’ËD¢ %ŽÖq”«éÛ-yÜ<¼þ4šígþ¶Y>ì¼™q“¾À·•Zsr뇙ž”»Rø{MWíJ¶@®t¸Ù$䎎ê{«~ùÒ>Ä.KsM\Z §pæ5™B’_à„ã:Yg‚`ÌŒ ’ÎùÒ«qă.S+F – B¼¶³… ©,דÀcYhwRÙ>·ŒO,˜“+ŸfúûK}Ä|Р±4*1:½Ïç/´ +¯ôÔ¦sºI,ìPá>¤ ž¨+‚ðÀ¸p1#û!  º'¹ 4ÐwäîíPÁ9Ë1HGPA,;²€âÿûGºûù' Å N\T4×21Šç‘íÇGò«9£þf&ÑK/‘}ß#XF»ôù¦ðDÌÄ–«qÌD߬ïIA¸åYŽý”%A[OuDT¬¿DÜô–hÛ°dL„·æ•|*SSv\x›æé„î±tù-ÊP<ÿüåÌðA (w·`P°wJbÞ?o¯Ì·<T§®C#oèÂIäÁm¤?ÌÖ§µ"qöŒnáDUÿ¼WXÿ ‘» ¶âBÍ벑‹“é¹Z™ÇÄ÷óL°ºªù:ÙC[S"~ÐVïæˆ3*5÷Ž˜#«TŒ+îÏù äÿVVqÏ®¬Ä>c ¶×•Ð™•Ú2h©ƒ Ÿ›¨©¶Ôk‘cþµ¬KÄ@›š—Ì«¥ÇaHˆ0®Çì:핤>ðÉÃ’½ +RS”.+¸szàÌ%^ëdsOÓînË?‹YSy šê¶n÷ZÍ!_¡^)LB†[ÏT“PË£ºì¿xþÌtÚÐßïD;9\”Äwè{d\—z(f„D>¦ýlS²ÏïW¤ ¨à­¡å"©Ÿº™”+Ølä´oõÆ›«Ê|‘MD}ºæD¬Z›ö +xÍš;nâæg§žK4"ƒÇ3=Ï÷©ˆnóížáÓ´¤~¦ãfׄ§z‚6²û?«&«»¦¾ÝƒZü,XB%ZmËÜ•»2䨉 ss[¸`†:OˆSÔÎOb_WAÚ7âš•„ìv´kïÃÉ€ù¶…ªØw<™Ü@Å;°ð¦Œ|IÁ(Pd‚!·íŠ*V?hÁÂðo0¯¥Wy¦€.&-ðúÀíÜqú~G2—ž&KmàÎòÿS€€hÝÑÁJ~ܵ„ýxMœßÿ‚m£—ÇÐßmˆ­Äÿi|\Êí'] A׿ц؊¸}¾Ÿ)® Ÿ¹œ$2Ù`%ä–¬úTŸ  QÔ¾¶ Ÿµó‰h‡ðÛ ” Õá +_çNdŸP!É¥*„GžôìÎ9µfñá—4y¼Ê×+çø¨ý3Nb¦ª¨|K‹ 1z™hxºy„¼ðV©Ó‹:ï±Y,P¦ãÖ#„ùìÁg›eÁ׳¦ 'éû‹ 6³³E¯6) +—;ÚusWÕ`v'Ü“©oaÓVM Dé¸ßÚ‚ÓxL‚Ö…‡ÿá­ž³€ÅÂïæȃ†á„fÆ ê ÃÄÝs'3h«‰ jýäXòŒˆbK|ÑþÌäФò5ÙbV8ˆ¿¦·¿ùêµæè^{Ç5¾²ñ{.çz%qù;LÙëX<uZ1AHƒÂœhÃ&5ƒ\¤¨ŒÉà]{$úÜ+-2d`&º9.êð•Ï¾Ë ÿ/f=B’ »UmÕºJŽCεĽ¥ “ô“õRx‹E%¥ oMs®‚ª›HMo‹fÑPYü¢k½zŽPAÿzæ}¿Ô­}|EtððQW]=1C/]L¼1 ̶ŸPbn«ªé?UªAÙ‡ nÒŽ&Gm×úlI,ĨûS›}Û„A7«5èbÎË—aÔý5ÇaR¬2DÓ¼¡ÉÑP྄á”CXz8ÀRº ÝlT£jI§}©!'83Jýÿ• +í1æ]€H³ƒì$ +â×léĈz`DÞy÷;Àð¼¢mWÛ/ZP³ØâÏÿïf_ŽÉÅw†m’‡š¯/ðSÁ7Ÿ@?œ*É™êõѧƒ‡Ã,#‚ [öq j°—íå¾›—oÊÿL½†1@rW.•Scj‘5/o8cÄ(k/”ÜݶPÂ÷ƒYMæ9÷‡Àc†féE‹*¤`È»™²6ž’2íÏ¥* +VÙckb8x»¡¿ fª €R”ZÄ1ïÃáA' .ÃͳÈ~™-æÞKÉO£\Ái¯Öœ„¡é[;¤î.Õ@ÎÔÆ⯂«¿Yçටû¤÷âË'Užœ7.u¯äêùÍWY uñÕ?þÜKWwÎþ“ª¾kÄž°aô* ÙÕ›pÏ e`öG°~iJò Ÿž|n_GüÎœ>sÜän‚¿¬5iY¸2ÿ9c$ôÖdL.É pµs1³}yll×'c&ÝWLU +£†L­/äÀ9÷A·ÔücðeòxÛ!t’H݉Œ càÖµpeW¸1“J¾m=Ì6·b™PežPµW[.¸~þ ðƒ0†ØÕŸ`ëå€&ý§€– l ¸yÔ\åIs5Å=ö°Š(J)£r„J¹—U…/¼n›!4-æµãã„g¢©5 Òo.¸©_KŽÉØÐÙh.­3⦞ðYuLÌûíuÁ5}N£Þo¼Ðüõ;L@{ü=x0 ZåçÍLPªd–÷¹²èJÞ$—+ˆB–ª·ú2weU±`ê+Aûw3¼ÑÓë¿‚ÄOå.W¯HÖtÏ‹ž¶“ùËÇ–C€®óš³—AëvGŒ &Ã’"2ûmÕÅáó ³¾uénqÙ»¢½ÇZf˜73¶Ò±¨S[½=*9¼qôùl<­Rã üh–âô¸@Ôû—†ç‚²((Í_É™,¹ÇÎTåþа¾òµÆF1†qB°¥y”IHWÊšN´ RR¹Z_L[–ÚaÅ_m|ZßiL8ƒ»)”‚j”É +D-ï ¶œÐÜÕDd5Ï"¨¬‹Æàxijt0¯ˆ£øQ2Ú%ZN:ÙžGã•dw8† JÚöñƒ¼V“–OA‡þΪϡ‘‡ ¬®Œý6<á<¨™ ÍÝtÖ5R¦¼—‹e̶è¼ýÛ«ª KsË.N¼÷„ÅáÙÉãßm +·÷Ñú!(0:¿eG]½ò%|оn=k¶¶ÁN¾ñÒ™¡gÛrD,Ç]+eóî~Š¡ùF§{ê!Xßj@yÙ+P~Ýíí Mgv”:2HW/¨pF[pƒâè؆Ù??Ec6¼Ãø­ÑbÞ¢g}FÔx®+k­z­K³½+´ýÈç©Ôj …&¾ß&ñqÅôÈq¬×Í2|²E/fÜÁ@>©L›–Ì¢ ¶õ˜Ú×ÈöÓ¤Û÷bIëG³Ü©øÞ~(ø~69Б’/ÉдÜqÜ §€Ð3y~åt¡[š$+Õã4 N ÷í«)(C,‰œYÆß×à‰¦ÃðÁ‚>xYÖŒß Ü~†áºb¾]H/¿ËVŠC¯ìŸóZÿÈá5ð¾ÿ7n‚VýG®kòý4¡À÷æO›ï|[}@u6Ï[Ö‚¾`æŽT–”—oÞRuÜÌÎ.ŸÍ~»v¶)-Í*ïTßFa ’ö,\)»mðê˜Üg?¸v4ºû‘:‡2ÕϯpáÖCxLÊ_÷ƒó)Ä3¹]Æ’…£&Y-c}É÷ ½ÉÄ¥Œ+ì™èéf¯3ãûɨʴªv½ŒÚð¸c›ÕÒÇU*õA[6lÜú‘8çÆä±Ìf_K†7f¢A@ǯŒ=Ä”EÅþ¦÷ +@õ<@œbmnÒV›<´ý²§%ÍsÎ,fa‡ÝÎÑ:7: BµpxãotÐ0(”IZµŸE®ýÔmËŒû–àÔ8ŠÈOk&ŒŒ/Ân|/6Âû²0>_Á• í…ÑÍÁ"M³-¥Z¶my +eC,]OF¨è‘èÅ­0?ß´Xõ;Â-ŽÁÄ×ÇÂí…´föLRð² Ú»…ÒDRà†9L·{eO¨Æúá7« +êEêÀÞ•¢$$ž/‚(ìÔ4ìÍÚÓG²cfM}—Ï&ZNÖ¥¹·+·ÇÂ;{ƒÝ%.Iv_7|Q¨ÿ9*‚"DõU Ýâú”Š·ó¥=ݼ®0¶)k–LV¡»Òœ¶b˜œ®ßcàÖµpeW¸1“J¾m=Ì6·g†Ñ!C èz¦Á ƒ²š®©–ú’®ú€·†×ó‘Í‘¶".ˆÀ¼ÿf ˆBy®+k­z­K³½+´ýÈç©×Šè,(PL¡ì ?F{3©¡ÞûsF'ÔÅ´%실Y ÓU\.K¶—û²2H{žf›ÃÜ,«—ôvÌ|ÜãÛ]ß“½l¯ª¥c¶¥›dx'¼o¼,<×Íïñ—ÒêÉÈB>û?:-,®y:‚ +%»-“ê7#¾Øܦ-vÅ;x¢‹Ä4¯L“¤”k°Mɺ¯¢2ï×ä=ê_°Nu3Ô°ëisèº[·6S… ì-í°û¦‰o ºæ‡¯îsñÓií?ŒNî¡&¡ žò.O% Ò4€€E¢44ƒ¯Uò  ©1Œ Èj<¶õH¯6ô!›Ï.¢)çb-Ò·ý&yЄqkFGìÍó£èËJ¹‰òÝÄcÝo„Ú¹cõ!1OR+ÏM—;n¥ÿDü%Ûè§ÛÑŠ‹ž‚loøA•Gß/7DPî~&ôcýfúÕaf¨øUaßHÚÄÅÛZ*¦°ЮMfä³D‘L¿ö„ßa%$øV¤ÃU8Ά͛²2V˜´8àdT¢ (‘{Ä–Öª*©Ÿ/ÙÄ•|z¼,Ëá-©Pp:ÃR˜=oˆ†‚ï;i0>#vNš]¾d ©……Ú*JùÞZPF²_XZÂÁ˲Ø{ýÿvÝ`i5À™RdXµo‹LÌj¸yf- -/Åx^ìç iòB€EÍ`ž#Î7yb•±«yuûa3X•ôb&´Úðšþ']Y`‚ê;5™ƒ®øcf8oàœSïuôù7Ë3¥q“ t§\Ø»sM- WÞÕ CU5<6 † _p8D ºE?tÐîŒ V™ëÂHºªø_‚€âÛmHÆ/È"ötø{Œd>â·ëòèð\4ZOø(?qëÆt¶¾X>14FO^mD.}‹X +` +ÖÍô#\’˜ï¨Q¹ZOÍhá9¡S Ò æu¯ /ÖQÁu§èKÿ±Ù>\¡Xí|›ùaXFî6n1ð÷MinU°‹g*Ú§,6ª9x‚ŸÝ÷‡X' wTw²:¶/KÇŸoš•½Û .µs+·t½:wðŸ¯\ ¶¤>òY‹s½S-ôÙ¯¼@‘x®b€“ƒl| ‰¡º¢Dg²í°£CpqÔ…½òùÑàviübu¥´‚íÆÛ… yÇ•°Ç£ žØn™ì½P€õ‡÷ˆÓ‹v;›6ʪÒÿ­ÞÛ#Èó#Ybã(‡ŠR˜©¬Ëß8=JУü¨4Ä.fxňCjiudZh½@'œ”ûûçV ¡ÁAù)Òž¯D ¨ µGDå¡¡\¦vï@ð±ý“kcWµdùŒ°‘·Q¥D©I¹æèËÉêªÝÎ6yd·±iªg>·‚âuí¿àξw 1Õ[¿Ú ‹ 9êðÛè׶bÌRt òø„„áIJþ!¡– §ÔÒ슎üÈOX0–ÖVÉy,áGÂéÕ½4•íR*7±MwCÙuèt¥Ð5_ ÝJ¥Y) +Ó;¥Uâ/´hœ˜ž´œ{ϼ¤( Ê{ +v6^m„á¾Þ{»,²qN7¯ËvÁã»aÛŠiALnܽuTZs.€'mÇzþý¬Ý„3†šÏ7[U¸lš +Æ_š¸tìþà™³?Ä€v–7JJH¬¢ô,›–<#ZY¼„¹¶¢Ø{…ò9𤻔ôBn]'5¦Ò„DDØ “ûH+$ôñi€U€ôëÿ‡¹*@@ùõ¦UˆÔ›¡­M@è%‚‹Û¦|©ˆ›1ª‹b?9þ–jÁ!NM£-*ÅSEø50⹶Oü]}¸_TJGV-‡“‰mFCœtÇæoÆþ(|Çoïý:XDý©¢Ù.ËÌ—pÐ&(×Êá#MM®‰ ¿$š!à‹’ éúŽãea:ÿŒ2¸"LHå½À"”Íщn>-ƒA‘+‘âÈp³:Y¶WÙ +·ŒiYñ®Tâ§GÔTU bŒ~ë]An#GŠÕl†\ô‡9—jµ¬/¡*oäEx'ëc[ûm\ŒfÃÑ„pL†4:É%‚C˜Â“zò±î¥ˆ2Ý|õÁâ‘g:ã Ú·6)¡Ü×ÀX­ M[æÊüÇÐOCAˆºBÓiz>(Q{ HRÅ3úT(’!cëìÖ¥jØ;Nŧ&6ÔitÞWp›M­5WÓä8ñ’¯Ê*ïÙ—­!ÅV7m!ûpm&%+‚Ê— ©ˆ€Â±‹²*wáÕêÁzŒ.Ó{ŸK⨚×-Ò¿‹Æ³Jܳ‡ÞpÇ(ò<Ú1akÿû³©¶*w0lT1H¯³.Ü×Hͺ¯ãァØúQ ZÞˆûòÖ¹Oäê:›Û([íƒ RPo—®åüw‡ 埭Qú°­ZÁ¦´Á–Lëƒ ¥&Mkd7w® T‚‰yH°V¨fyr’¬p#ÌÉ´uø’øh‚<Å]HYÀ‘HRŸºW£‘c1ˆ€{W›¶ ÈöœÙ㶭鷘Ö[ÄvâJO²Ž«n/„7³„â=iË1+B(%®—æ8å9Ñâ--F0Cwà–!!g­â+W(õù±æ‰Ù¬,riaŒ¹§,F=Ô')Ã|AÛsGÑkœBV˜_æÛuüï›°÷Fe¤_ ¯!4á’bUýF¬hx„£T} 8•ߧr$7H÷ —R›œD•¡öq5B˜ÍÃD“f-s6¦ÊìÛ]a—˜”Uy*dRwv>b¢x”µ¹7ú­4:aÿ7|_]pƒˆ³EÌÛéée,qª2qÕq£ñ:iª£9ã‚ë/¥å°UæbbX¢BÍq§öá4¬ácÅQ ›Yqà><ßÆÓË^Ø3â¢ì¢ÀÂ꿱VÜ…®©DA¯ƒ\…¼0dbè­YÜãÿÇü–3BšÎF•miII³µáÜ!~Š‘Ñ7ŒAº¸´umõP’»ÍÏy£‰}>ÃfÞú¤ v=Y½u(r`àúà“x\:Æᛎ«Pä]îOÍ,¸A^N8Uô™[Š%{>¹[ýJ’VLßß·>í‰7‘Z°Wí,Y›B:Å$ò1?3˜´’8pÑ6k]Ò:ñ; @}âàÆ»y‘ËneM6N5‚†åw<3dÏq^cFŒ°,=1Þ^g·ouuüµÖSÚÑJ‚0ÖÏú7ÿbGV¯øñ +,`1 ý×ØÍs²²¨ +·¹$ñ¾«Ÿ`Ú,@ï—æ“Ûñïš+4m™ÝµDµ•JØÌ„×Ò^åý¹¥_óŽåV²³ÅGo°övÕ'⾧uKôLì°9¸o´t×.ŸÞC¤¯x´!Æ~Q®jDÑ´½Wɇòb‹±ÊLXljôøÒ²Ò¸;óÏRvFûÐVÝÐÂœ’ÃɃ,ƒi‰ÈðÑU¬&Ë™­ãïð!1“éeÇÚqc°– +2Ö’d©œýºê8¸: #ÃmrëÙ” ¼êu1köHÃÔeá,ÓaÝH’\¢&¾aø†–fÅ8&®H€QãkžWk˜3nWÃy9@ñ~y{!‰«o¢ë$êAßd>Á¤"$Ë燡¯óà„<³2J¢œ+Rº(íØ»'w{H)Ìa,öwñíÇ9e†Ö¨!,`µÁ)×ÆBùœÖF8k;Ðe× ó¶tz4µ,M6@Ê:6#8£²ú­ã6‰¹QŽ;ã? ucMö%{vµá©®å›ëÃáÐÕ©2±¬ÂWBKùèõÕÛ×»2ùvâ®FŠ4ø'‚‘œÍ°îocc¾ïŠIåÀB©5È>"‡Þ¡Û( ·œ´[0Œ¹ê¾ÑMâ2œ;'µú»¬Hð6dêÎ×:ìʾ¢*0CUD‰ž›Àælu]p½^+ðÝ޲̿šæœ–¨¾§næ²VjöT©ÇÃKìôŒÙrlò¤Cà}=/æë3 +Sœ©]îuk#„rZ‡ÌÂ_¹!.ÖÖr¯•]½ic¿B{y!J¨çï8ÅýÅ>·fʈÁ‰(ûͨ,‹ñX–§±ƒFÍ~€í2²Š*f‚ŒL«~os¬ Ȭòïja`¼”zêÖÙg Ù–ªŸäÔ&Š?Ûøñ­Ó"l_}å‘j^Eq#l°^%À¶î(UÓ3Ì5‹£…s¹š«é)u-ZŠÁÍR³7GÒ…ó„‹¹ýÑ/1Àd£Ä°Fe $ ›^˜ÊZR pzŠ+.õ1¢øo«”ëÖµ%·XÀæ¡Ð”(lŒ ë KmbÚÿ”ŸWjV!Û³V"¼®§1ËD}ý`ùF¦`ÚÏrºqTgVnþ(ßç)Šßw‰C1í%¿iUá= ´ÙÊÊ„g% ñÚ—¨ƒÛî÷]š˜iÑa-X'm|óÎüà—{h»ï´¼Äss6ê×3Í'DRëàv¸ú¬õÚ0µ©s>ºÑQ$ÄÍ¿sEu#·!²¹ÀÆÈ@^Õ³1 ¦0Gè‚êørÈ}5 •ŸÖåŇØduKwM¹ÍÂÛÞ¯4dsÄñš ™¹@׳±Ñ¢îüz‰øîgü†¨jMåÅ‘˜õGÊàÀ¥qŽÆÙ‚Âl–C“’Wq”¾ÙqÇ##’$§UMwÊÓÿŠôøÍ;'|(œ†HWÁBÉÔý*.è¶=f9oŠéß ø4?àÅì].¹Z7Ôlƒ˜£t¼BŹ<Ũ0µÅÕísò÷ÑYÄ[(hwDzjáeŠUä+üJžüq“]í\WÏ‘ª/w`uMŽÎ£4î&rVr lä±ÿÊd¾L:ÉQñ"½ÕÇ$Ë€#5ø0¡ŒS¾Ó’äA™gàÕi[Ž/Së=§ò9:ÐQÖ€…­!É‚ÇÞT—°šZePÚìs_Žü@—6¦´yÓ_¥‘£Ì»é{Pü"–Fe¼&ïdù| TzÒX¬wͼ–cnª¶vç$ÈD¿¥ÿήž—fG2kcÔ7èNÜapr’åŦêüì»á6`Õ69òÕ™”.:&ÆtdæÒÎ? ~#Å,ç½V}§ñìdø…^€öÑŠ½Eÿg4pÝÁö9íx·ASt­W[*äÓáVÖy~WÆ ±æÕ¸(Œ§çú,i JF9vY…o“OSB`÷½Yj&uÁý¸äKþ î[vî>“*p- $rÉ…w­m`‰\(’`èx¬äöU6$ §ÖïYwî±…Ôâ®æ<¢WãÀ͖҆óbÅ«¾„×¥¸):RNB(2—äó–F ÜÈqE¹ÃÃÅÎ"ÔµÞ¡x†²ÏÞÍ|›æ·Ål˜­…ûv$õäŒö¤áDû3mó[Ô³"‰÷þŒf¦¨-}u`ÑÎ)a vi?¼b¥LKðbì_Jƒ®0¥Tê ÀâüèPaœÚK¬úJ•‰‘çŸS¢4óÜ.Ò«ú?aGxë¶)SµoŽýO¢ ¦&kš•,ªîýªÜâÔ/¯Nkc“4:ÝÈN…#ì^s®Îcxxx!„‹C¤Ö±ºìÃÚŽ¢Wck_A»©ÒT”žX…/Á,m┵›á5¨•úe«cd6® å§sS‚<Æ~6rjð¯Ùë«swn{&Š†^;¡Ì?ZÃyƒ_\¼£¡9ö '§âA/BÔƒx“Ò¿ ÏMm7À.©ÆàýÉ© +Iß,S—U +£2³4ƒ%Ân ôt ýuö¥:¥ ˆF¾arâ^|¸*VY/'ü'ñÔ_ô»Àáb1€~&‚ÝÒ‰ksÁdÃ-»½ Îó=Ô/‘#)âËã÷ÁDžOlŠçÖzî›éÍEì4$ +jËùÂÞf™,ôÚ³6ŠdÉ«Zv…Äfè—–rúÉjÈâFº†q½Þƒ¼bKàÿÓ‰"Í¥Lx’ùL1»õZlC‡'™F³­ªDèª}|\ýò…šty.á° à7åKn¾JrQDØ”–v”`œ“ ªö„Ž:rßк„D)½;Yv:ã9!Ú]?.D„ØŒï@Î1<‡T¸XÐZ¡³­r5…Dk×9 +°ã9'΄T2WLš[fÚ­ +” ù©ceº¦.% QáÏ&!)dÛ.ÑÏóé'…h„ÃÄÂþàP©“Ù«Âe£žä¤”¼ÞàuùJçÓ’à’wûN?ÐöÊà+¢9‹öâOmÌò×"xB1êlò‰‚¤G +qVþJ>pËd7ÁŠoæ3AЙ³‘Õ†b–¥drIˆxcb&¬4­¡Y{+#Ag¹ë=¯5ç•¢Ó!“Ü.åªìmþ,†t÷Øÿeܶ»ß—Ò»w@§Pžs“•Ð€ßŠx–ŸóÀ…bÞ@ºï3Üq÷G{†:ÇæÌÓÒrÅ€!3©Åáhè¶ HÁ "’;ÞöAˆ¿–(EßòÁÆÁIÁsÇ(-¼€rñ^˜Ë¯’{Â8"„ŽÅõÕ\¡“Õ<5I“¥U ÙÞ(B* ¼ÅÿŒb¸”àw Té•~dƒ=f¯‹]r]!õ1!€ÁØcòFÂý¾n(ùTòž°þww}¿þøb̔΀OªÈiŠçFpVÌò Ü×ZCüóftÿÚ¤âNx¡Ž(QÓ®ŒÿwÀÈÂþÿAèá¿~œ°/.F—kQNIk´ìŒpƒMÈ‘ðiÕ((UŸ¯ ©0Ô`ÀåV94¤ ‘…FV¯¦Ãê†&~žD³KõŒ‚ +ËýÇåóõÅÕvm‰ÒÚ µ»3M[2÷NpÛ1,)‹ë ñ2©©sÊR©¾…¸L¸2x/Å°²+pÎúW6a¹ªƒ®+‘ÀÆå‡æ2G˜lò™!âzŽe¹ör/™ žÑtÛÑ LV€ìJ‰z>b¢x”µEñãx[ÿâ޻㨬…4¿l´K&CïddL_ۢʪN&çBU7?J[Îþ/ çÆPt©Ÿ¡5ìÇAS:\Æâ5¶<ïL‹¢’“·êhr¥Â£«piÉÆ'Ä£ ‚3N|v‡éo—Œk+%™Ã°lyq謇È5ðžË¯Þ(z2áùj7Í̓ˆUÞíP˜¬Y0T[cªžÒo&kѲ=pci ž1Æ‚àÙ£»¯ÚY*o6a ñÀ‹õÆÈ·0$}‰4]QÅ=";s{~-z…ë‘QJ€øç"og ‰pX+“ŽØÚ1ºî¾sŸú2TP‰…nT³C3ßI 1(+úåŠëX±$r¯"ùÉ™îÔr²Â¸[`¦±>:9X»ÊQå°6m¿mY+{ ÉÌ2¢·¼ËrÖâÝxŒE±q›€04-GûÉÇ‘|qé›xºÁ†Þ.øHD³ß꙳DYç¿‚­Yý6nw_+߯‰ãpWÞ Å²y›Jkb©É»ÏÀ'<>–¡rc¹ð6›fºläA–í“Vç%ücUÕÝœÿ!ºégñ8›¬ùï§IahP²wP—>QaÝÕ¥Çhx˜ +¸HÂØ 'r°]¦=ÁZZk¯íÙÊô^%Ù£s§ú."ãy%®ÆÈv¨´ÀˆCkåx +95]!JYe«¿™[À¯B–¢7åpÖ뻺é†×“Ê@=ÓépŠL3\A| 3ëT·¢µ%'Ö?=„Ïn¹×aÖ”áGÇÑþpñËÆGD"FË'@V¯j!Ϫ;Ù?çhŠôeþ|(D¿ƒ.MTˆ ¯&JD1ˆCíh·õù¸+Ë-‚äëkÁÒc£÷žÈAëg–n›oOÝÿbè+ËãÔªÿJe;¥HLÆÂ߇ðñǘ‚Ö—…à€tô`'¸4é°9l•ÀE’z™ªAdSÞ|fÕÓGz•ÂI€>cÝœ–ø[.¯š@R±£‚ª.¢Dð Ýθe M.ö¢Z'HR¿E0à» ÆÏ[s +¦eèúåÏ–ìñH¨¡è»!/":ÙÖÙ-ÞK ¦¿C`þ¤dýéà¶öô’øÓª·â’Ž‡D=oàipŠ‚O³ N;7q v'—g®#1—ç'ÜEé(p…¢«qx;¤ß§{t«÷Z¬ ì‡ìxòº[~e¨Ë¯$ó+N1*GYGÅ~›6äÕrß²ò…;(DÃóÀÀû1XkELÏÞ0c&Áyëòd»ÕŒš¿œÜåDTøë2¦š"ÌžC9.K^ô:’Ž«´gGHø3O—%ôìÆñS3¦y+K¿•Ñß8]¹Ï€šÐ“]m,±€=†­‰-™0v}æÊVÆYn7¨} Åh•K.ý.¿•Ù¢¤HÙæV\½™EÚË0 +QÊaözÊ"±ã‡ó6óPú]ÑÎ6ÎUa-5f~­r[- Pò^Ïô3[Å. ˜Ãjy¤±XeTc˜9lÇåNëqó²ÖÇÝþýæC˜翴r晪?çé+xýk²*¼àÏáÃ1¯pß.‡"« Þ ˆU 0˜Åo³+ËÀ¿ Ž3WÀðXëÓ}®g¾âÄ+r•Ÿ{©Yò}Z;llÀü-’X”ù½™qŠœ³sþ>I ú\ÞŒÚàÏñ2ÃÏ3©¿Â…‡`XufI¤è°"c•ŒØzqðÈç„a-¼ð¼×šÉ2šÙ?HWÁ‰„Š;áú×)›.Õbª•LªmæÆS#?žÍº~îw³Ñ~íëŒeë|<±pNLO?ÁJ©éˆ«º²0Îøø¾ÌÃ_â‹~ÎnÑÐRþL¼ˆTTjÀå×Ód¾TærWÉîùyŒYØ1uô CjlfÓ_Û³•lF_E¹¯Ÿ”(ãi^rÆK<.ò;á¨axEçëCÔˆW…ÔÑð}Ý7<µzB&p¢• í˜ÉS(?£ËÖºY¡¬+¬ñWê=j›ì-™ö0…Ùh(ô!UöÈÁÈP¼COrŸy4Ë¢;Z¥¶Üõ±m% ÛžÒñ]<ë º!D¶îgÁLÜgç°³aŽìñ~ãzbg­ËFÕó Ê5Eºägë´eP2ÿ;6¥Ê÷™j. ÍÀ7XuÒ¢ (ÈÎÍ2„Ș #Æ$cèçWwÖ#1 ¸wo ±  Óo•·˜|1oòçaüº­d/5\ÀŽù²°96#lôÈ£W+÷¬|ýr…u`-ëþé>㤻8ÝÒî”g¼ÈÝnæâcÆ!ÜÓíÞPns´ã–úìcur?´ b]t(ƒnAI²×hÎRiÄD›¶L^äÎÏÒúõwA|5+: ±Ž+­Å§"(Û‰Es¤rã!Õ  +ˆ)î›ÖÖ}fÀî–²fÎ*o¤]‚y:«ÆC”LvRÀ|jÕ€Ý@õFÌ¥ù8DU›?QfÛP >EäoúßÒʤU›Œ˜x¹cDPï?æ8ÌKŽi6nC:‰˜S #ˆ®oÿPý$Ø~†½IØ»~E»gdøˆLÀ»ûû¶üå—Ãò)Õp+mÆ©Ô85¡/BDë2Ê+˜­¢¿‚Òà¨`šŠ™ýÔSWy`+Žé; ø·ÚÇ]‰ŒP~ùæ•ëyQçqËçßQÔfŽH)Hê­x•xm™åuÅ,Š],CLÝ»¡ƒUí>nOdv—IƒZëåˆhtòÖ#r8ãò»¥ò*ã.W/fBê•CÝo@4ƶR Nž_pI–¯;rÏ_²áË8áRh>gÒÏœWŸ^pºLUÀ{‹•¡ùVñLJ” ð‡mÿ^zÈCµÍ‘¶{î~\â‹åIy&Ÿsð>‘£kÚèër&Uç$ýå8Ëž•FD©9yáýº4ÅakÜ:byàéÆ "ú…é¥+Vî~'7{y‹.òÅ)j¿ÿ‹%¨Un5d¡ÍìñO\¥ºûÓúŸm4¥œ†ðVœörÝÓZ§¢ÊKUqA?üî)0%lR$íå"d°ÉÜbîc4¹‘šÖï/ÓŠ ¿NíÖߺºv#*iòär€œv’]UÆÇIDí!ôî!Â10>J\Ý•Åiœìw}9ç W¹„ìR=ä¾Âí¼}ÁÑ—ƒ}ÇɤEÜž§y°ˆú â횦)'íO²Ôxw.¦æn8nyBšI)·Ä*"¢&Œ¿Dë›ë_øTåláuÕÙ!­ìL1¥È¹ÕÈ rkÔñ }Á"„å¡=á­c"°Ž‚æ½òŸ%BÂ6ãnËÝóÆÜ’oäÀëþ“l¥Y¼,Žê¶õ-ò2ÊùÄ{d´pÆ·ZÌhdã=¿cGU´.ãŽi¡Ù(ù²Îþ ¨|tù.`!ÁמÍšLnSë£ÙŽÐä”fÞè‡ç£¨¾ì„¹’ÔZ&€ºÆ4—Å“É(±+xÒ¿h/è7—K¤¨UƨÍóQýi~”®õm7ÔŽiû>˜5—NT9“è€$‘x‰]"ºFà¤Ôm[îU#ßû`&ÅÔŽå@C3ZBþ'ñ£ÅŒDŠ¥S…•XpfÐcˆ.™‹ÜCQR®Üü˜o1£KÆ„£!VCKÌ*'óCv6i›_HYÑ=$§‚ÀÔ«„ff3Hº‰ñ«»8Ä„ÅN¶¨;Ĉõø†ãÅ}‡OåÇÄõlup¼ÜÓ_³"ëÌrƤ}Ä+T–MÅ0N{%9³A]_F|2n3$Œµ&1©Ã¹¨Ø|9fuewã“Ã}ÍÃѱ‚¬Øg} +z¤¨ð7Wü|ë}–Z°O|¨4¶?nï+QæƒT|ùÊwU9åòœ¼„‘=è×~SÚÎݬì +©„~ÌX¼ r1ˆ˜E'³¶Í¦Ž"žË†onÃ÷X±%Åu¦ÕUö ±‹XgØ~ǯÛSODGÀ5«zS†Xèú5™A¶lÐ0HÀ5÷ÀÅÇ34's+)boC¼(s +°×ÕSò"£°Ö½ÄrÓi3—O¬Ñ`×Î'¶§M^TßL¢/Ór³ØYƒÕ¯ g¡GP«„„µ, †Óåihë^²Òðüo SÐWèH߇…µí ×t°i7l~–¦‘$ñ"e—±LêïP|–“öf£ïG˜Tpi0Þ¸"ê˪ZÆx L‹/8CèŠà nZKäÜHcI Ïãì#ÿÇÁ_ÚWy;l´iÜqÃH‡QéÚ–”rCéq¬w7ôs*ƒ®´ðÓ/ÅÞ~Œ~2žR__L™•,@Ž(qØš03¡1øb*vŽ©YdblÚ},%§‹‰ˆošÜNã³O6IXØÞYæ{£¶#) ¶¡uùh¨u=:ƒTiV—>t£önq%³Õ¯DšÞYžÖ.O_(reÄbÅø@7"›÷8Ó›Œ`_¬ƒÉ%ˆ*FË{𕽙YÚµ¡¶r<àô­”Êó0iÖ( 5Ý¿íä9Üðň»PIÊ´Ñ‹ù³óà„/ßγWbz¾uc“i*c!jµj†îä†æ×ÝÐåŒø+ °Ê´&<·SÏ/í°€•k½ªŒSu«i±wH ¯D24@þ¹KùQ¹ðy"=²m¡ ¹+UR6Èy¥ò²ù/9+öúã„2ú Déµyœ$äNAÿ$ÍÓ;ÊDdÿ 6-A{ú¥ä1þƒ³µMjôm ‘a‡>šý4ÚòÚ±gØ}Ëê.‰>æž‚<~K=‰.,2z.àˆˆ1Ÿ!Nb_ ÍBH±]>¢7éÍ w°ZÁØ”½ì‡»–ÔìµL¹ç…À‡—«Eyщ> vS…Á">ö×ôÜñe(va߬ÏGŠ¶qLF*K(îã›÷b,0%u îN¹Ó±âɈÂ|xÖ­ËÅ8Ï·¨ÕlÞÏfýà½eªÓÕLRN6i96O@ßI)Ž5#©$ŠŽººï‰è,¦YYÇ3û%Ÿ'öŸz2ét„j¿¯ÒF½ÝóÃ^MìE—Y×ÓW_oüƲ”–ZŒ,:|ŽE¯€±~ .ärÎÒ’ôí bGAQ]”¢È)r”3¬ÝjYjkýCoQWï ¯7>œìªÿNMkÉH²wÒ YÐý—\ÒK©­Ù¿L¼6QN¼CX¬Û‹ó.Kð\nº£»ùÁRLO-úü6 Ïɺ*!; prц؆Û`¬^¨9>QµXíÚÜQ|e,þ$·/Òó¯ g}&w§½…ž%Ý}»\ïü|‚ɘÑ6¡;ÂWu^¤Öv®Oj0ºS¯‘åe÷ǯC{ŠÔòõ©^Í»Jõ6b!߈ó!¾Ý<á|Å%KTËé ãÐt·çTÄw¾t5²´•y¤ÑV–T®gŠ]ÜìDÌuyèɳÉtº={Å·ÙÛ " ®7ZñK7îðËM·œäsû€A¨s‰ƒ2¿¥hü˜é¤Å[¨® RŠuUaDë0¹ Žà +ÔÆM¡_èkL51ÿ%Šgµ  Js‘?³3,e‘œ…S/Goƒ®\í´e©ÕuðÖ¨GVh'e¯D$NÜyê +Êà@—mY\}±)À¨:{ÔÈ‘p3~X´Ÿ |5Ƶ´{±g6: +JMuÇ^” J&Üše†Ü¡Uzı42Ôm„ÑqNÆÿ+Aá +ÕuÝ,ë´iÔ 9§!•¹±—:}ÙSR¿2ë‡ÜsFý%ÚPŽ‘²î>€Ôà’رTøNmô=¾ø²#q&+E w lËßÁî÷¡ swGjGtYØ‹uH‹ï„áŽÁ|2ëá˧¿'–?ø#—¯ƒ(†IŸ™´Š’ó|4¥]£[÷„Ú 7Ç@ƒ›Wz!ÝÐ÷1³!TŽ)V~è•!¤ ´˜—8øuåAåõñðùÉÎH·zÑK¼(?%Ø¡ˆB+^Œo'Æ4 Í ?ìðúH¤NýÍ;t5°j½Š!uð2a†]ħۭ Ö¢gŸŠ7de×)I[œ-HÚj2¢wÁÀ~dþ¨.êP4¸á á΃y20·¶§ÕõØÕÐUƒD’¦]œdBc3ÈA{‹ÒìÅÐÛ®È_PÕÍ2aS"G¨W ‚6¬ƒ  ‰žáÑYì[xEv-þ7¾ñþò2ȤoïËõ‰¼ ôº¤ÕÙeVjôB?âM&%æFÎrX„x¿”p•Õ'ó¸Ö4qëõs–‰7)y ¸‘€ciý@#×q`’|ÿ•MêØäëÍo˜§ç>¦•Ø] 5£¯§ ÿ¸=\´y-n4¿ç„ ¯qåcVåh¬Êº~å6 ‰g»uêzÚ‘L +t.¨m¶,?.?3’z[79ê€Ýe¦?Ò 8à,qÈsŠÿDsm¸wIѶó—A=ùl²!àþ¸Ð*]VMY²?#˜®\/d„‹+á«e÷mˆœÔ{^š¿¶¬1åMظs÷eYÇaЫ1ÞØ™~*)Gg±B(2—äõ 8GÏŒ²1(aq7ݘâjsº”—DYÔ)kÃ¥„ac¯/ðSÁ7ŸD€#¬v¡óC»>aÏMÏÏØ}pj0‚|§BcnN/“± +o%¦]à¿/¶Û€ äùÇã늒îŸrê¿ÁÜ4É÷H4Ü4Î{!XéyOö‘ÏéGÎ\šQZÀ®*T©œ¥: E·31Ooá›dù¡É¾ªøÈÔVvjN=”>“çhâ!¾¥*2BC¶åÂØLůώ·yL€øÈêîŽÁy¾‡cüß RC‰›ô—KªJê2~óÚªé2¯qHm€¹ÔârÑU­TÞê‚b¶¨bÓGSŒKV<ùŠHY €‰èû;é^ø•†TUœÖÓ ³6¤è:²ífyv/Öƒ[mH¬¦. {‘k+õŠcYaq¤­°Ü-(£”w‘n$ÙœwÔH¬²Õûç~çŒõàµòjâçùÈèÄùñò®;•`Éß_æšs‡=“Ñ{_E£X§¡bgžR… +)*ÏÓeÈM‘Jq‡ÓXO×p19Ë9Á¿}—YÐ!:o6`|Ç!Ö¸¤SÐè&-\ľ!ýˆ©2|6 ؈E$‘{uau…“–S&5¹ŒP¡V„ù¼ÇšÖÛrUcY¨¶¿`Jýº¢hž™èú€™<(6ûY=§ÁÛÁ‹Ë‹¸FÁBÔ©K?Æó¥Fqª3vÌ),É# òƒ¶᪫};°ùÜ|ã­%ïcHã²Cø’Ä•š Ù¥/€Šgwšº…£Ø!ó%¨MÙÊú ®¯ +’}\}Üà=̲)ÄFxD=ïv1dbƸz6¨ÐRÒäz—%è2ùÅ/Yý½è‘Säá¼0bTË$U(Ë¿¤(»64Àe¤wxƒ•D¢âÑGØcgCRWcƒp±azuŒ¸ôž¥’ZáõqÒÔÈîID:ìÒºÍUµ1•] Ýt†9ÚÙù¿»î“¿+Îò5î9ÖÃÂì9ÜqO‰G›·‰\æó¦Ï‘2 ý û•s‚t„gC\ ì{¼ä´/7 d-¡tl)4JñüqRÌ †S^ŠjÑ,•ã^HùO—‘d'=N†Ž³†i¬¥wÝZ³¡”‚Ö”"ÒN´¤¨ýµ˜ä¶¶ +PUT?µ™àP[ÑVœÁ±ŠÓo2­2°W˜ þ"á&bWÚ¯ëÖz°ØMxÌ–DéÌŒ&:ПR~¬Oäë¶ûïƒøE; _äög! ¬·ù þwV‚5¡“õÓLõþR}5R•ÙVë›z“•IÙa9e˜dßö&Ìñ¢£b&gl\6©2‰Ûv[Qp2C|6Ð$Ì,>OrÿC†{¶"ãnNùPÒ‚%ÎFþ° ™@ù$× ¸¶Î*˜g°Û×X†§:¤][÷\JzœtÖ¢)¿æ¶¥+(àèõ*UNýÖruÃŒ¤ôaÆî‰nÈ…´;6ÌëŠËSl‚7íÂâÂI!’1ƒHrÜ{Aøh`Ý¡€ Ð~¸Ô˜Ö¶þR¥‹«hræδ!þp—±NJ[iÁouGˆ¶³ÃÂwmMÞ!ÇŒ\²ÆDÚ°&*I—‡Ð™¦`¶vbý=7|»ú3‡þ%ÏïCR’qJI*þ!Rü³‚j¬ôH›èÿ æ +ÿ„Ô_™F(Úåö߇O;èǹ +ŒDŒ;lB»Ç;q´KÖ¦µuÿ@¶ƒ™—Ïè_µ[V¬øäè­ûûÜ{K‡péb2'ŒI%s›£Ûã1Á!ù䤗{žùÁUÓ$È3¡®ôuJGU34n+Xa¯Võ‡4:¶¬åõMQ +Áqe@æZ.)G•z©¦3óŽ ×閺̱V†ÍºîöæC #T][™MýÞ\8³„YÒ ÌÁÁ‘Œ–½/þ` í4Dy¨a\/5:TîdÍ·i­`CNò‚BFÍK|u’u.¸c&;ýÏL±!Si¡ +o€§œ! ÂŸÀöí<çZ´àÁê Ön…Üešê–7V|ø/qŽ·ó¾±•êGÚQáèßYçÜ êÈuJâž®ZÈvÀÂhU•“üXÌç)lµÊÂGÝÉ6YýÎë‚ÎÖMùŒÈ@lXÿ XȆÒ€P=zyhq`.Ú1#7¾d*OèmË°i +À£”ëžòGõíóÎGï#qzÃ:”©K±¢œ¾fÕ*cqO}Ø…~öZô04yc.\µ¨V1¡ +ÐçD@Õs`Ñ1í‹”âüVMéœD|5ß×é¹lÝog•ÆOŬá‡Wë Zîr|Ë×/ݪ´`™{®Íäg +M.UÔ±8àž·Ò0Öî MüÐQTNìêC¯Ðp!l0ÀÎtº[X 6êÝ´O×Sis&õ +µNòÖ̲oà^1õâŽ[¬è „O~qð0ƒ»‰óó$Ý­e¼„HÉ7K#W]©¤3¡ŠA'Ês2åš*~2ò8’U°IR¹°Ýí1‹É©È©è×±¡Ö¿‰ÔÉ=¥…Ý/¥L ^a?t{ö£kŒ(´~¥¸ñ=È»!×¾2Ä‘x&Dˆ+l¾¿öGq +ôªuÃë]&LöF‘Ÿ>t‡™Ôœ0a*™2ñq,±¼ â á|CR;0̦Biª‘ +©ë–} 8,É’Ç”áv8v kþ²¹±fRÊ«6öZÄDΓ\º´1—5DœÂ«|Ÿe³.£:Ki‘»a*“ˆãLóšËKŒ"ëh€~°—Ä™mJê¬ëÞ=eêO6ÄÌÕç(Êx&¯$Ÿ”°»½Èù]Üÿð9Õàñzyv–eÇA~).÷™Ú‹\xb•uDèHD ~ܽýÓ{SÒE=Û‘ÝÂñg¸óƒB/“Û! i»jèœEP{á±dÁÒO³òè@‡ý/ÕG ˜#\ËBVv^ÌÌ´„½gÏ#ôôî:.¹‹ë\!PV“ý8 Bðw§_ÍÞå"³D_ ®i±‡övÍ/眊0ùÜÚ}ɤE…A5ÞÂ?6Ù©Rå.^iG‹ŒdY+Ή† +lºŠ±Ë2©îwA+žFçùË!‹á¶p묹Èwñ,û1ÚÛb­àf41`35 +¡?„tÃ"Þk¼ˆe•bÂA¢ñÏÖt-NÈ÷¿æŠ'Öå“‹˜µ/‘ô5ÍDu ,8OÜ/ôµÔ³½y6OEÝžT²±Ã¡%—RuäÓ“õó–ö{ŠÏCœ«þêÊHE?w옒¦;´­Á•3€{ÔÁÏÒŠ½£¤¹Œ%´ž®)£®Ûcð +ï¡|å5`}³*y—=ê±Ïð_à¾Ôy^@ÞïíûGÒ6ñßHŽ© Iš‡À­žML·³êx¾ÊMÚ9¤Ens4|Ã6ð"ˆSKd:d>Šƒ€)è2ð§ú'Êæd „ñêS™Šþ¬DGWDM'½Þˆ:Á(þlé<Î-þe‹ +rϱT7v%µ’OSpéÁ? +fˆÉOöñ©u;ìíûgCš÷Ó·°$ß]ì«£SòXCÁk&•D[•¦§Ht…¢uÕ«kýöj=Ê< *i’¿¬  ¦k†:…¥õ”×áýˆ³™<«¼™%Í㜠¦T×"Xðoµ+…ŠœŸ¾ ‰aÕ¼¼lŸ²³˜:Û“ŸÃݪÆ>–¬Ú¯,ùJmþÛ›Œ½ÀÑFÂÌtOW7|n•¬;t†5–ú4% 8Epdðt*-•ã@ 2› ø‘ÓÓÛUöM ;¶5ÓÆK ƒ_̤ÎÙH4G’yîÎ8ÓÛé“# +âJÈì°?ª©‹ý‰j:¦wÜ_Î6ÚXb²š2±„÷2u™Å (†9¹‡Ž¾4mÏxy7„H–â…:™"'Çæî?d[2ÀÆÅš†²bŽ»ŸˆÎ>Ú%Œ½C¨# +yzÎ87m°2}+3ÃzéâÏý°úÓaÄ• †Ê5g3ðIguTûãœÃÔ˜§Ó¤- óy#tbci4Ýñ2§½ŠÙ™N–2iÃÝñ¦qpËEò¬Ì]–ŒÞ#ƸòÒ< +ž üŸ‡£'?¸WZ`}çõÍ0(7ºIåŽ ù>Î÷9,ÍL=¼>âÖ¡…îjR£˜'W½…£{=cìüœˆ]cƒU×n Õážìz;¦ã¸¼z`„¤NýÍ;t5³ò3Þô à‚µD´¯q'êm[õ sVØt´åm?öPá¶ÆN%6;dl¸>yÕoå«® g©Wu—uÃòKµ=d<ŸrÑïVOJ{U„E¾5nË)»5Ѐ}Æ¡u¡Ìb§;4GƒÐe ¼³K:âwè?$S/’(À½àŽR‰ççjíù„ +‹td\7îiFŠÖHßuc ^)‘,×æ6.R‹ò!þJr¬‚+5²ç™ž\ÔÎZ_¯–"æ»ÔÁ1n^v&¡·¶ú¬t¥ØôwtÞ:Êá“jaê1¬¦O¤m¿Ì«uJvù^ÉvW/ä@×íÆëÍ*ðÕm\³Ù +è™S¯çΡXéž7äw;V\ÒîLÂÀVÅÿì0Ç~pôÒõÆ×ôFdÀ n×3 m õW'ÝTôWéÂÒÚ-Ò…³‚òÀ +àEk £6 +Ù™B m°ý‡(ÏG¡ƒÖ€˜à…v¾]{øhàe‹*¦pÔ‘•GÖb}šNíY86Ü”OFÒl>c|ÈX˜–ÖïëÉ™sÕWQןÁ;/§æv”^V’3P»¬ ÷Îñ{9û*—¬mˆ+ÚŒ&iø°qœ!­Õ.¶Åí‹Dð÷În0=*›æõs»¨’²§D–nA´%õñ®\Ô¢ýÐÝ𤞻Ÿ +سÔmᜣÎ,'õ‹Žl»iJzRW6—MÌ…çʹ=ëà›²;h…ö“˜oÅû9Aib&%òI:Ù^+ÿлé\Ó!OwãQu7 q“CºKAÉ¿nu_õ¥oï˜ÎWlÌ)§`UÆvLcÃç4Y ƒ&ú¡~«sOÓ0Ô¤KYñ¶ô~œŽ\))½ÇîöØhFD“ßF®ë-È"ötø{Œ~G¶4$CK† +ã® ïŸ 3ÄYë¨} +BO÷15=ËÛâH‘AøÅÏ’Xúé÷º2YÀrg2JWùb+é8Ñ“y—L¾ ïý–Ã¥¹b~Âÿï'ƒÄÌ5g€U?ŽÔnZˆ]ÏÊPBg+¡y¡%öTß[ù”µ®mh¶÷̉ê¯X[—î +…Ê—¶´ +ÀÕ-Š#«Z Ö¦XVÇ™IƒgÈìmk`j¼óßÜ£5§Z⤜Ðí㉠Œ̵ÿÙŽ¸Ôc-“yó16¿‘÷IÇkoŠyÄÙîop†…ƒ)ªÓDZÛÝ­áŒÌïf†®H/X€Š¨:Š3„Þ÷§÷<]KAåíú>QøÐbY¦{w§ OUØ„1˜žÅ#ŒUAk ß@Ø΢wÛzº{ +ÉùIb©Y)I£ÓÀcg³óÒCK$JŒ¥ù÷vð»"2º%ŸÉ .gmÀ„öÞ^½:Ò¿âÔ8Á¨p¯¾ŠI©Ñ¾ËŒY©ôv?%f§¥¦ÁTÚ-‰›vòUlíÆsò úBs@{\”=»Fôsõž•H#´ûk +§ˆŠ?Ie¦g¸*ÇÑüWY9>![õ¿@áªæ™¯7„«[EÙI"¥³Õáx¶#X¿^%öQÓõdK¶úl{]´Yvf Ì™ùTªÖñ„:Å©Y‹q½äoÀ´à±qn+ORN'¾Ç°Q¼WòBéðwÔ&ú@s¦…'ŒYˆÛ9iô.”ÏD×ÒKgŸ?8Jz-Aˆ+Ò0¿ +Sàp¨Õy½.+¼ÓŒo[+ /SºÇ¢¬T…½ÿ úÜ WµñO«"¥zfiVD#ñø£«ZìZTž¥psSõ ö„Ìþð¿±HázÂotŽÌ½Hwì–ïrÇIÍ(!M„ª zŽmY·²ª={²·Lߊ7œ"1r1††ížDŽ[Ê&Æ…âÇ%‚>åÒ``p×Ý`^R h½WÝ\Á[ŽP)1s¥‡19ûùdzÆn¿—Là‰`GVÚÔ*}îËuÂõtnü¥°.s óÊ¿yýTôÖ$ÉILz[$ }ãýWHy¥Öá·. áÿâ 0Âsê[ƒíjú\9BxC’¯ÁD°]ørëŠ+K¾t0ýf3È©—y=¬ŽÌmH[(úø¥”äß²3/7šåÀ{éçL#E»#;AIÃ`k±ü9Ë_ßçCÔÏ‘Œ_Xß6N)aQ`XøLÊxJõü>³òÙôû‡Ø‹1ê*¦ÄwQ˜†ý¬óˆƒKoã¼’'…IãÕã0nxQ¾¦b ÐÁÂÆïÒ)7’Èc²1’d kG20,?‡ô’71˜³`éÕÑî³ûßÞË< +…<–kü5€Ä]53µR_ƬۮôVxO¥9òJåw{ám™8že‰ 3ñÀõŽ;´Ò °²­©èy{‹‰@[T¯=·}Çw„–½(aÛ•-‘Õ?^ß›xyƒ–¤rfI áJ°J¹¦˜ŸòB¿¨›Ãý¿¬WÒ®á£M¥õ\SâÌ¡‰Ê™R$¶ÇDÎ[±§öê$X-mךl¾u¬iƒ`0íZèåaW_ÿb»óÔSÑR–Å-J&ö%ÞyŸ_!í>‹´ùJöÌÃüȃEÁ®ôßa„êñ X%”‹~Fz¯¨× +îQë +½î¬”¿6¦ÃV +)|½“Áøzèâïì`I¾­&½´^ÎSåÆ“wë¼Lº­¯Ià˜aG†$¡NÁ')ØÌ·‡NÁn>p±%=EÁMþâK+§„ñ3$ÛU‰£J}‚Ç[œdEjˆøuL?¥|XÞ¿´\z<>ÍÝ4pÄ•B åJ2Q—àÂ/ Æ%KÇanéÃD¡>coEZ%h«°ÀUä§ß.cÃTþíÏÕ|vÆHŸv;£=Ï€…éŸ/™)RŒèX‹‚RáÇNAûÏ¡‡úg¿ÓðôNŠgEžFävéÙ¶Bà“3b«\µƒ5³pÉBÍÁ._ã'ér‹|÷õèÖN|ûOš3.bž«ÉºEQûš ±H> +´½ðktôäšyN"S¥HOh}/9ôϘÞò•s æÅITîPÇ"~ï6Š-º¦-ÎLßÒÀeã)3j5Z›ØåÒÉW?“©éFî™ +w­‡tW&ÉTÇç›Ø'ðáí’TQ= ®ê8±c’”iTÊ8ÈjG=N¦íip@J-x„…îê³<54ŸÛÀèkGd%PY&½Îüb×(eÈ°&4ãàn³jO’Ô0Ó®Â2µ@ïj¥…ˆ ü`5ÙeþB|No¹Ít_šŒññjöØéDJpRsÖ’áÞ.š…ê´‘QI‚!Ö‰Ö²:e}eï½RÌ#sPŽ/õ5]ç"]í˜õÎÞ8é ^p ‚)äifVä6Fü”5¹eÎÀ¬àܘ#¼íëÁÙׯàæµÕƒÒÉÄ6{·L¢E\{†|6ŒîÐ){…Ö#$ÎYWÜl‘H™wé…H6Ûá8có¯ÄmTpZÈÇ¥Òøдï±uÿ¿’‘µšRWw.Q-ïæ §K,§ÌëI[¸Óäžø +Ôc\—2†¿dÞK*6ËÅ¢2_½=ý» 9x@÷¸ ‘%)ÁM¥ÝÇOL-²Æ›h&øùí÷jÍÕ5£2*aæ¼Z¢ÎA’ASe£{°lTLCøößb*â¡Óºn¡œ¥Š"vRŠXDÍó‘RÔÉãÙ^mIt0;™D KØQ¶ –!±h‘_„Ë×á€Îo«Ipör¡²Ýà¨jc°Õª“$‰EŽ~ ýÙ>YI!âcŸ6ñ¿0t’zb«þù(Xºh+¹YëWQ—™_Z»xL•\Àššò‹× °{mUÌ. ©sƒùÓÓé+ÛP ðÔÓMe¼û¢ âԢ͟6_Ñ^ è“f6 V\½ôqû[= W¤à=zèv¼ÙèssÀr‹kŸ,B:õW¼c¿Ã²t0§ªbÜLqáíüˆ&6#Έú^ ÁÑ&ÞrTMK_,óû¤:D §µ¨;»Ð˜÷¼À×ì°­[GЄœëBaFÛ¯8eì ι…ÜätV2‚e'+‚¾€ÁJ$¤„Ô“ÀÕµ…þ´•ëœ>LyPÏp:„9"q˜"nÚ«Ð|†*ÙêX?èýS‚²ÄÂjúN÷Él®M’€Õ)³ˆ`ñòcL7¹Ó«®ò.ͯ?àJläá +õ.a›31*.¾CoÖqW.{˜“©ÆÏäbÓàÞ•rM£” ¬ï ¢×cøIŒâq›µ—(ã%š`.AËDcyÙ‚BI½ƒˆg?ꃑÞd"·9,d£V³*xýþ£ëU…w{LÓ$tǦJcÝ ] +Ðç!VÝä ™j\yºút©¦4ÅØ-2W-™Ÿ%Tføòe³¾!†3VèqÏ!?H6`ŸÏŸ \V³Ö'ƒ–|µQËö¼­òZ4?ª¶‚ÓO3ͺ)ÅyóAýbj¯8&tÿôý”ÞL\y[Ãz¡–-ºÍ \©äÁr ö¨[>û^úE´?X„,^ÖÂܼt,K‡ÜÐ󔨾?¢¦ä´äixýç•`ÃË™ÅÉkÆ #ú–Uƒ"Ø$àcøèüƒõp˜@q® åñ’öBÚ Nq0úê­W?ÆRqŒspŸéð'~çØZhmëÌÂýç"ôÙ‹_º 5¹@M¬–?ù ê.æ*‹ ÊewµzºÉß™åž çP÷éüí¼XÏw‹ªq²»êþD$»úŒ>F*‰è§ÊÔ[eSù« ÀCŒ_š°ÎOþàaÍ¿Þ”Ú£Wn$ØžáôP¿SǺmíSý¸™sµ×´×~ØSÔHiCfÍ*rMä0Å,5bÕk0pÛurÂV'WfTn(Y¢Oì´ð˜,Ûb½ö±XŸ'dÇ—;@Ð!Â,ƒ-&%êÀYé°÷éž™K,Õ‹C^è+–ûˆõ ûBJo‡š˜e )ê>vÃ'”ðÊU²Û‡õ«ñ×%U¹A¯ãG˜hÂÛÝX?o½¥»Ðv¬%LìëÇÏàÙ[-x›Zfñ3,j«t\¤ æ‘SOcák!`¢¼„!º²c›#ä{|êÈÝÔ\éxT—ç dA¢’70þ•t3òb¨þbY¿Ç¬þ‰è!ýÔ¦”}š´Ãª×®US@vÌòc>Ü®¹¨IÖ'Ãò$8ÒÝÍ)(5›b1Æžm;º(±:ÑEPź/§Ü3‘Üù—ö(<¹ (_¬r‡5Yî1Ú¸ &–‹†3n¬âRkf¼8°Ì–ÇúÄìÄ€EÁ·Qgìâ(öf@Þ½¬Ä]GÖ.oëö‹ +ø›wäŽ]ïn8 'atò©KI}âÁ&øCüpß«•ôP ®™T'“2ôvÿ@e±ž¶ +s½³hû'@‡ÚÜRU3X˫ΑÐshwóþš³³ÚF‹™Ã”´°:ÚµáF2Ÿ[~„¼Zu%[ÌRL(fû€UÉì~ñíþfCü×õ^r”ûî r—¹-+:}:©½×÷]W.fm·¾÷QÒ +úZÒàü˜WÖÁT}©¨nSv+¹V†D0ˆk±Gã,ŒÊ‚kíw¹y`ç +ó»ßhÌ@4Áü p1fÁ%BÆGÌô˜«ýÙYBõî{ÿÊêGO&`“ +°¿?µ·ƒüâaXîˆg”HrdWGlM‚©Ìó2AËF<Ä*Îèb´ýP‹@•¡Àóᩈý§8áއĤÁ…¸x37ßN¼ ”ÐX¿ÙuÉ"?E¼zZá¬|qbuAX²€‡•ª‚’„÷BŽT2üAË$ˆCí{ÑhŠ¹*à,–bM^¨šøŠŒÿÞáa äh)©‰2ÍØ*M=·«Ñ -Ç¿YÝ4^à”`Ƚ 6¼ˆN­¹§lÞQ¬²Òy”É i®9'ÐÖªÆA~–£Hˆ$!’&û¶":] . ‰'13CÓ)“Ó·îX$¸$Ï~®Ö#g©³À$‘+/õTù€Ìê†e1¨!0BðJ¬TËò\”Á4æ†É+qù©Ýú B&åÔÅ% -È«ÆÄz¬­¤¬¿„„[a1ŠkDNž¤OƘc—®|#|{óa€ç}¸‡NŽ>F­6!—Ž ¡ºk,|Á­ñæh;/L+©ySŸáø¢ƒ,vjbß¹B "bîö3Iûä\½ÃsÉÕ/ßÞçì@ „ídiÀ|­wöëæü…ÈЩt'±ÓAÌ·x,[KëÖ–3LÛqp¤#øH‡ìàÕRV{õ„—ìé£%LŠŒQM[LJ§•YÊ‹5ÐfÉ;Å7º +£"ÙodIH +&Åd_$T–ó&œXòæŸA(Rœ?ƒ‰//¡€¦ïY @§Î‡5ñ#ÜC'­ÚrWVÈ hïi³Øö¸þ~7èÊ—icÒt>¹¿Cæ!µàÁóú0MBä1‚eÎþp[Ìj–>šQÚIFPBh,y;tEKër¡6#ÏõÿèuKÔR PšÓˆÔ#_ë¬È5#áýÐ0.:E™bá6ŒùÇêÀ ôIÝÞñ9Ôéaœ Ÿ»´\$&€tÐþÂ.KpàìθKа­j{ÂÊ â Å€<î¯ þRQ—.¿O/“àÿÕœdzó‘Ý“hN#«Å‹æ™æMÏ$îÝÁ}öy4šî@ËÿuµÚMÕT"¿1»wGjSôôœ¢ÃÖ~CstIZó ³š#uA͇8êr{gíÖš`¾LH7§˜’WŒ‚ÑOQÆNqýZÔ­ÌøP·ø= ¼ú¬š Ì~‹š%×RLõ@ц·óóÕ;ttûIPHjL± ô#O·º™o•Sw~3Cô ’«GÆctô«Ña ¿’tÌQÓîÇR"ò¥0mMvdq‰3ïì"ª)c'¢{´óF© ‹\hº(v³Oèc^FäµáÃI&”ÝøPôÞ§›“X{j›ÿä»ãdCfÚª¤uꉴ ÚÍVB;¿€Žjâd×r#?%rŠ»,Ø2T)¹‡©¿¸bïÚS±¨9Nþ¼@ ÌŶ]z„œºópžÃàm¨{UÊÈ‘“AãÙ¯¨ 8ey[µ-e)¯"zð°D«Òm~…òÞÃØÖ‡#ª÷?l¯vOý¥ µ7VÜTО¸hæ=|lM¥¯sDBçkîÂÔjt³!¼Ù9v—õÍVˀ̈¼½ÔKù¡W•]õk}!-cÑ¡õÃ?ž¦-wô·žöí¬Æt6¦ýï*iÐSß¡|Ɉ§¡ƒâkËåóí-SÎÖ[£¹Z­o@_ÞÓº˜,OÃ|ÉLœií¥þm>:Vf…$gú¿tžï¡ª«â£,¥{`— Ú”1 ³-unÈï¬ÕÏY³±^7RYMYáÄŠûÞo§3¹³ÀЭå¸.àƒ¸ÏXï£Øx­ºÅ¥}}òqä­·äØ‘e)²°KÏ»H_U÷#F£ÔyJŽXðÖÝZ†sà'‰îšÓcÅ< ç0ƒ‹Å¦/Èà ÿýÃïy»Ga`ê£?Ï¢, +JZRêÏ+–/¯¬º–2…Ëü.©j†ôìŠR +[—dXsFŸ‹¢:æ<àVÛ+b7וQ[úGèM¼–‹=Qìj°ÙÊÏë±g¼"œöÉÑõ† §×®Ý‰[n6?,Á¼_xôšçÅê“R'Ü»sÐÃà +?=MÃÏÐæFš½‰Ž@Â:Ì«2ËkKÖêQÁÄÉ ¡Ur C‰òNt*¾8)¶Ž… ,d¤F*¶‘'pQ>L„:Ÿ÷lÝL,j|'Õ7ˆ˜Ó9˵Ì3¢žliRVòÙ0":„=jGK¢Rá9ö²£òÚÁÅ'Nª×$;ø¹y˜dЃ©ñŒ#£ +æ r´`³m\J²ò´³3ò€%b´dëÞQ™¬0qrù“žÁB$Z¤ù$‚Zïò™ykÁGq0m¦ý tÒà¨Q³WuðéG +endstream +endobj +2226 0 obj +<< +/Length1 1761 +/Length2 18831 /Length3 0 -/Length 4049 +/Length 20592 >> stream -%!PS-AdobeFont-1.0: CMTT8 003.002 -%%Title: CMTT8 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMTT8. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. +%!PS-AdobeFont-1.0: LMMono8-Regular 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 778 222 0 %%EndComments -FontDirectory/CMTT8 known{/CMTT8 findfont dup/UniqueID known{dup -/UniqueID get 5000830 eq exch/FontType get 1 eq and}{pop false}ifelse +FontDirectory/LMMono8-Regular known{/LMMono8-Regular findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse {save true}{false}ifelse}{false}ifelse -11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /HZGQIC+CMTT8 def -/FontBBox {-5 -232 545 699 }readonly def -/PaintType 0 def +17 dict begin /FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMTT8.) readonly def -/FullName (CMTT8) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle 0 def +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMMono8-Regular)readonly def +/FamilyName(LMMono8)readonly def +/Weight(Normal)readonly def /isFixedPitch true def -/UnderlinePosition -100 def -/UnderlineThickness 50 def +/ItalicAngle 0 def +/UnderlinePosition -133 def +/UnderlineThickness 61 def end readonly def +/FontName /XHVBMR+LMMono8-Regular def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 98 /b put @@ -27021,57 +32228,133 @@ dup 114 /r put dup 115 /s put dup 116 /t put readonly def +/PaintType 0 def +/FontType 1 def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-456 -320 743 1014}readonly def currentdict end currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ24 ÆòŠôWäEvçQ‚C<ùó3£‹ØAÀÔæ‹ùàë2¨ÿ·kX0k^ß|™‹:Ù´¼fbã,|Ð=úësLu2æK¿¿Z`3ndgï¸RÈwô@Ó),qñåÕœéG<&¸®÷­hï'¶ì. ΋883 „½Aœ½®B±AÓÔ¾I$sò@Îí“Féù™ÅË25âÆÚª,iá™êê pKôœê>˜èÂ6K`Ð Ó%äÂEû(‚ô¼„(bsÌP’ù,‰‚…³âW‹K¼T"0KDǯæ¯Ò–Š›¤‹kIU–ܨf/+WN/ƒÐr’±Ú4÷¨½qðÕW@À¹d³­ÌO ŽÅá¦G|Babhyè52+ÙÌ2›¿up·a²,-ôIÅ"‰³ðü±ŸMC㉮à0×’v\’›ÝàäG’‰\¯% ØŠZ\+;í7t›‹ÊaßmŒJsX÷€Œ´Pcû0ÛNqËL¬ÌÙÄì%®‚tãW‡tåį,ëœ×Çñ ‹);âbÈë»õõ&L¸î}NœÖ²òGÒi$ß²:[´<+s\6qõG•úq° Ô±%E~@‹kÖ´ðøÉÖÎþ„÷[iYKÆq[Ž‡× Pvø£brÒÔ~ÍÇã™Æà1zŽ¼6‚ð>)廄¾\û¯o¶xob÷Bi€n\!¸[ï(;¼"0¸¾ª:È÷7w…Ð yqMom¥D‰$cºqÎKRXr šÿÁc¶À3–m¢½ƒ‘œx]Ï€¦òï†â‚F†Ð…»í4_~nµ} VìF¤çuM;¹²[¹©)(6!^c«0Ü?;´´ïÜmÖXþ¯æBö󈊄±èÐ<ür#£ƒæ5ªö\ÇÖýAµÎçþÝzñ­‰¢ÄÝ!»±C`0î~°îȾÆUt[¡ijÛÏÖAsÞÅТ3X¥ñº´‘ûh‹ ŸQ‹;l:²´†hAßyÑPñw¢Š_R˜4žEwv½a¿XåäI› Íf£ö”JÀ{‰ï“ùþc -©šû“ÓTƒXЂ¯?6ß_×ãe—–?[=‰Ük³§ºDÅC³ÌàDר-'íÜþ¶Þ¥²_:mȹw‡à¬i)EÝ -É!Ã.¬â(’4G¶Å×gÿÐMwùÀÜŠeŸªãoðtn“¹Eí§Êy¡xÕ<ý¼PqÝ;AÌ÷Üâê >OÍÈ8‚}y•€f€2r¹ÏÇD§˜Õ’ù%Æs¾}w`À ÛÓ°ß’Ö/3x”Œ®‡Á6& ¨?ügçÙ‰¦óÊ.Ö\YEIIö~Å:Iß`àÔ‰ôŽž—Òµläæ±WI1‚{6oy"˜®Þ¤ûüW[ Í—¢#@xsä*¨·å®“dæÎ4]'oK·è›%~%j¼Ã{ž|FA1¨àà´@Æ*z}òÔ\1±ì3©¤%ÑÍuÖ}jyÌ<=í/Þ ê6l§@CC³À \+ýÄFè!} †kåŠ9ÓI ”F _’›îW˜ùÚª+Ìš~Ø"'K²È["ŞР˜öººß4§³±]#)Ÿ±¥»ý l'ª¹iÓ¥Úì¡åj§I:]Ïe± '®{—°½åͲ™}ÃÏÙÀŸïü¶Ù|€+¾ÁÙn­ Ð³Î™Yçòé0ܬªs¦d¨u¦;?äDz~Å!À3È·gÒÎǯ^@œ³¼Üe¥Ã×xƒ¯òÔVa؉ýåH±aÄŽqO*K{ŸÚà¼Ú‰¼ó Ð%½8;'FJŠ`‰K¼»!-%l•ëR,áܽ†x!öòÅz¤÷á ‡óóCêB!ê1¾ß@÷¾ãùÍ K¾=\ßߪðb•(m¿Å4ôwÇPòZh6?@„L½C@²äÊ8uwç1ÆrÇ¿è¶ù©Ì$ïÊBóKÅ„ÂØ -&Ì¡‰ç·Y5<1B îdlaà;À«9^¿'l„Hn,³ÛúºW˜÷ƒ{ÐÊY³µ¬Ô[œvñ +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿[úKª•LI5Œƒ(¬Ú¶Ÿª>H·Vhg!ÁM8óyiÆ4´óW~Ή\ef5S›ƒDy¼îè ™/ÇÓs©jeX¨Qö²yýsÜC´rny,qš€ÀwʘNó>~ï:Þ„¡Ó98ˆ>Æ0—jìÐõãºE™ Û¸t–†”=ÎøxÌqÚ¡×ÍæsaÐ2Ô… )–cH.#{2­ÐVí°”ñ#ØB¦9Ü;öà‡êT§ðÿO›z9R¸BѹŠ:µ2@)s¼UFìŒÒïÔÕEº’»+¹ÓöŸÑ~`Nc’ŒdÙ`À§XXyÿ¯R1ð×T›ŽuGxei}¯¯'h©YÌ°®ëC·s¤ +E€ˆåψ#†poyL0 !~DðûÍ,ÃîYÀÙJC–Þ}”¦rH6¿ÓŒeN Ó¸l¥HŸñ¸È>·Mtô·Ùn¨cÈð=¤,LØ¡ šÛv~ÉÞž(… v”w¥*B¤@nx̽J 9܇A•&oóĬ‚ßžßIŸdyz„Ézg ªðNž•ÏA¿î +ÿ})/Äþ¹Ñ‘Z=q·´çx +ÞxG~½ýÇÞÕÆ»xzòˆW™…5@CütÕ¾ÞЈTîG©=ظçð’½A¸DÁÅ\g1*mÃaXŒÃç‹ò΃Þf»Rý +žàåqõÎ÷uú`e(Å¡Ž"av‘. œ{‰ÔL¸¤#eå¼ÇÓ(ìvŸKL9’¯ëeÛLHAkά&™õI¡ú§á0Çï¢q\JL›hÞÑ(Y|o=æ£6¦‘[aEáðŒfÄ£àI>8É»{Oæ*dÉofkÒ¢fŸ8V”J:_#w¾’.yŽÿoPxY/Ûð±T‚0RÀ|K¸ °Yk+qœÅHfƺ'/,þá:Üõ¡¿rÌSW÷UÄŽIôtG`㚺ÏÓý +¥Û¦IÄB£î“á6mz ñ|ÝrÍ3|«=5­º#“y.‚ø•E\®!å.³î´Z&f>hn§<¤gØ:Ù7Œ¥SÈЋk4ûóÝŸìH™ŽÔ"ç{¹–¹§ â 1{—Z«ðß›Cp¶Ä1¯(:_H.VO1qáÀä–B7ÙÄ€yÜ?Q:þÙ Z¦„­wÊ7#HyuàEŸÊüA¹yà…õý$_ëþÜ›±DCÿùÉ+H*ÅÁÀBzÒN,™vI F¾l€2«6-eÇJEmZw°Ä8!ðGìãža†gÅž¤™·D¶ÜU]'‰h›í–jÜšÃÐn$Û²Ïûï&v3Y s[íÏç$ Z*1Íðëãì¸êu1köHÃÔeá,ÓaÝH’\¡dÓl«yBŠ/$a–¯ ËJ•Çjj×òžúîfSy‰Ay[%:†Ç1dÀ²ž„Öø®«æ-ŽˆMt“ÅÊäsÉ6Œu‰xNõT …¡­îcõ83Åt!%ÛÅ-ËÀ')o¸ Mó>~61"€í( •¹ Eê"b /§×R}K?Áôr­Ía–æýø/m’£ñ~¡þè Qêéïs[ûG·âEMY ö Íù¥Ü E†Ã™/Å?ÑéiÌâ~Ž¤V¼‘wö0‰xøÜpUñòñ0}øeékñ;Ûóå>QÛE‚…Wè;ê¦ël†-0¬ŠØ›ùhî1!,{h®)TÂ…ÙT¾¹Yá`¶éK­\ûÄS£X(…&2/GÑf'$rˆé¿\ŸDä + 4+„‹c- ˆáH/Y™M®É£Ò‹Ù€ŸîýF˜–FÓJ)AËDcyÙ:„Ú}Þi(0Îê/#/_ðáVƒ ƒi£ÝBâ/XQÒcÌÈØ8«½‘XœõëÅÝ Æ/y涧6q%A¡æJYÑ64¹xºi¦1îzN æÓ>×kö’û"ñ’Ó¼hèuh¨Dî ŽK':Ë]ÏŽkPÔc7aŸ†¯¶7= îkFq{ø¡xâ&n¬Ùµ‘¹/)ið¹t‰¡Ç¹£PŸ—O@Ÿ ¹^wŸ.“å +eƒ©”¸È}RÝkxÉ´iªajØéÁåû>¾ËÕêSþ^mš ë7A3¯sòóÿ4ß{Õ’¨eø´ýIêœ1Ö\Ù§i½Å8j&g7©Mfä³D‘L¿ö„ßa%$ýBSF*×@´Ë`p¡xBwy~ˆÊS©I™cŒeô[W×`èü%]îsû³*ñ‡j‘D á1Eœðû!°üÙòkÚ;Fg»ÖôbE°®ùîrX1Ë»q+ÔC>I+_ß9æ¾Õ_åVz½=™QõQƒÊ ˜G<îwwd2FF…røJl}ý¥Žµu@ô½† +)T`g-Ô€Â9HûÿÎÃé7N]+ÂŒ^é=#fQr¨+lÛ£&M¦‹Ãù —祫Ô'>ÿ©i–jxŸ¢ó}qìtßÞ,~õ³ÅB(ë¤ÒßT=•Eâ øzsQòn™ÎÁËÊK EÛ?Júah·N"a¡°t6-fõª"¦\N…Q:”¾‰BÞÊ/¿o©,ûR£¡ÝaE½ž¾Èñw¨!™Ð§Ñ>ÁÊ,p±—w ÎxÔDöíù„ +‹td\>\–§Ø‰èy¢2·•9f{ƉË(`÷äËèH?ñh† õ¾-YǺ;Þ qÃyWè…ãÔ©AOßëÒUW¸_:ÙKc¸ÀBT®v˜DM06$þ³c  Ÿ3Îä^Ix_ß$¬*]Ö4å7ÒÛbŒ<™Ðë¬Ù‡[Ôô*Ñ& z9àø~zÕüð³êL9שŠ€K›<j +ŸhQÿŸñ þò™.–JÀŽ= ¡|R3ÂËÉ-Ð<ŽÅY|`ÜøEb÷qX¿¾­:¶–‰ÝrµYHÌ 3~!ï\úúdö×¥ÿ¾,Mxß„ ¥‰™?mƒ˜q…툵ÎQÒ)³¾Zýщ¹Ñ(¿ã$Ò´Q4gÝ.Ÿ!Nb_ ÍBH±_œ·íWY.Ëù€h@ ¸jžæwi&2wÝùª®æ+0ÇCôM&¼bD“±ÂMYâ¦õ…XÊ'N@q{{Ô-½·*îTK`WPµì%ãµ fbõ̇¦—âó$ìX°GÕp>‘vÓG7é$-ÙßÔê-¥S*-³ïÐwÙSÈË /»¸^ÈhÔòGwG•"Þáú$9󯘛Ù˳¾ð‚eó×gñç |Çx}T” HÂPð9œÎ¸tŒE×h½(Ö¢þ*A„ÇwŒ§ÁÒXºÄR½‹µDµzÏ/Ÿè>>h›*¶•µT¤J@ßp+E •ËÜ!õŸèž-ÂõõÝt_2=m‰;Q˜ˆŸ¢Uû–` y3Ìåë½ý.h´ì»°ƒ},îX©§Ao×n÷  gŒ°LJYÛ¶MV}åP»߆¾rû©\®­$dØØöÆD¾v™ÿÊs´6Rw•º¥†ƒgýÕ³ºi;»9"¶c¨jÁL¦±Iå*Íu²æ+I*à–m]Öý”ŸX÷‚ÂÏÿ°¸¬d¶zƒ%¿†úBå$³i‘€oáΘjNoñ‚ak›:íc5, r&&©œ¿=8-AjgË“îX×Eÿ€ »•¹sûÞJ×Ø6­!xg#vâ³üæ>¦ƒ‹Ù[}¿='XÐ)þmÁ3jKTØÖâdí‘h5z[j¸˜²_ü +¾ô?ÃÐëÙ& þ +öÛ/”Iƒ±š\B2‚áý +™“kz[66ºLœÓ²gi­¢ LÑÒ´¡6zZ—QÔ_´sý<ò™•aÞ/á×›Ç;ä³”ésŽóâ?w˜™–Í骋&D$¢(¿‰Ábëhêm±PêЀ¹ýÐ|ňö•n¥9øÓÐ2¢4óˆÏA8—0lÚÙr‚¤j|jHõ£yéÝÞ`S +ËýSî#$+LÇQÀˆñ~ÿï×ò[Õ—ŽuW‚J­6¶4Ÿ­£¡ZP Wš_a šP7ˆº»1;üâªÙúA–bÙZWš¾Vfz›uÈ—Þe›ÃŽm¦å‡Â>p$œ26tàaýP¤L½ó‡ûú¥­,´©Gšë2°È2ÿÖàègý¢S¿á­»ðFݧ,lÎ.ÙnúÐÌèx¯•PÖ÷i‹–÷­«Ë™Êk ºCT¯øдâ/å€g5 ÎÁÖ ²Öšº–g>9“8¹ˆ#Nž–ï?A‰ªŽ‰W„¹×D$ÄÝUu1ŽPÕÃ3u–Ýœ®mr8-`¡§S*Áñ3©ãhs&Ov¥–? —àĈ}ï¯^PÅЩO—[Èåyþ”%¥vä¹»Ó…wVÿ „Ü‘F%öÏ!ó œÍyœ$‡[öõlûoÚݔՃÓÖ_VüvÂö)Ó>A uhêÝPû¸‚"ëð¿žøUî愈#À'n"ú@!ÔÝ ,á"gNO ËP4æ`¸éøWÇrøî¹?4‰630þ§ÊÇ;%ª Ý"|˜ÉõΙN¶‘ú8”<~9TÀBy ‹ËtÌáùylª´¿Pojüf¯¸ôîrñoJƒý;o|Èíˆ Ö <†ÓÓ(y¥õHì÷[˜Ȧ–ÂB¿I¾øùém¨G$U‘ maò@S²Õ<þ¹êTH`ÖˆéÍò^˜vŸÚ„ú˜Ú¦)8¶lhÚi±ÂâHÔÙø€Ý—Ç8Qn,^ÍrÓÆ57oˆò‚8â¼4Ñ^]ò•$4ðîèB ö´x÷±y‚¾ÎÕÆi'Çe8ØCJHéƒ ±ÊHâ }Ýb †­Ó¯ªÓô˜ë3õ™©³wJ¥»ìM÷÷d3n¨!fSº/°[ý€ÍôA»YJýºIõ®îúË‹ämäj©ìá6ýáye•p’”̪HŒ7Êïßï]úúÊñ ¢jPŸqinýj¦_èÝÏh–fYG–p|;²û¡]Ýð¾Ï·{«Zjg§ÀίãéUÙRn} d¤Äw4‚¯Œs…ø;”fUáXŠÎLü“š!žÇ§ÛÉDÙd¤ågšª¬È« „Ë»)žÓQÍÌú¡áæÛ + +cü· V?èð‘0î6°ê¶ X#|[ +y¢qííìJ½›yk™+!æYvX‹£àkå嶌˜7‡QLÝYö"Þâ5k”)$%Æ< ØZN¦ß n ©Íçeæ€GKŸ—“uˆŠç„DJˆy³â]ÊÝLu ±’Õò,o|òMç•fi[ä9£7È4ìlŸnÇŽðó¼IbGø<P´ ,Š2ƒì ŒÜî“ÝÍõ½ïµðˆrÇ{ pÁ/|„PÉqoŸ&^vÿö`ÅJÀ1µmðo¨ÁÊñJ(ú¤Þ.“~tG2ãú¶¨±¿PÈ@9À€?ÜW” Ô"«šúã•«ÁÀÎ8½§5zkQ‚‘~aͱO1žßF>Jöø ;XEr¿å礻îÝ$A¸ÉW¶ +à¦9úÖ'{Ö{D&ü¿Ð € Ì’Øy (0ÎBUÍ å}ù©h¸³H‹±Ë5rÊ|**Eï4= ƒºm +Yl½ÏZ–ú2Ù !*ó"%YÚ¿½‘J¼kŸ-Lp¿¾^¹ªs´~|/ã`®yÚË•…kè{Iþ§tÿ‚¾1FA +×7vtÛÍŸ¯¦Ä9µ§™^”½ª K’óRêÙç†!«ÝöžËÃ^r ›/$íåz™Ûõê´r/lö’[3ãÊ#.^À +n^ÇR½Æ¢ëÅj.‚ãÄØZŒ•=4áÃÀc)JPšZ)y&‰qiœÙ¿¬Ê§Éz‚4OgiXËçdì«èJ¾ÑPÁofÒ¯“VóžÃFTÚ”?U÷S0Áºò«« +Â0Ü°(¯šIÞ£GXà@4°ÂU|'77¡b±Ã »µžà˜h6Ÿ}l‰ÉhىϚ1æ¸õ^Év8lBR997ÉÆ2™äV|lÅE™v›/!ʘ\¶M²Õ <ÛNCæÙ_7 ²‚¸0ö-ó|~>äõ±NbÜKF°eæýH,Ý»ÐÑ?ÔºòlVÊ'¸fÇ tȺÛ@ád¨±uÆ®‚Š,2³u! þŽ„åµZ“·3ÕHu”Å +Á¹ŒlUNpZÉ4E“ãHg•ôÎhR‚} 1G½ !»Œw·Eº˜ùåÄsA–µk·ºK;“]{Ÿ†¸Õg°O·¿¦…²¸£>ÀÁ]ó¤€ýÀÆ2Øûu,×Üã\Ò«b‰è¸¨Ën¤;‹µ±={¤ù^Tçn0h¦CÙzA"ü2åt@ôË‘_0Kõü mW@.}yÆë© +ã9¡úàö¹ÊgÖ­0ƒ¬æê0G°ÃœztFD™CRp±{šþ. )åña¨Ý½@‡Ÿt(”Š!ŽÚ†ótÊë¸#vïGìQûQMï»ñõ6¸HÍ_¦…ÅI‘ùåÄsA–µk·ºK;“]{Ÿ†¹ƒã|yžÆ;ÙOõÙ§-ÅeÔì$n‰dù6¶Ù¢RŒ]™Û¼¸wÀÁå“<”ÉçOOøR9‹ +ZQ$¬’Õ±—N‡ÇöóÃüWÊØ…Ÿ3IYçù°¯ŠÔëƒå}€l·Ñ;ÎÇ 9 >¿×Ý­ùEð(Z$Ì9ãáak[^Kg¡åF3¼=—Gà‘xYαWv[¯2MA†íKóé)ß(Ž±¼ÎˆØšÞRã{¶â(uýÕÉÙÝ׼΢¬ñMGá‰P·'ªòBd!ð <`;Ö8'âÕ|ì9ž#œC¡ \/bN¬®îŠ}jr¬ê®!ÏÁzëHOØo_ùC¯³Åî’cký0ân‘ay¢2ÛŒÊÄ6WÙ8üJêꞈªö×+À³gFOÌ„ƒ)¼¾„$ZK^–º" +à\4ÞXaà®…Œ9•žzlþ!Žéì3«ßE¤ú%ÍhAŒ^xýpà?ÞñŠ'ã/ÀÁàûäúÏ•›Wíç$^k!öÌRj)9úûl0£™þÈü×âmçdiG™c»¤‹xyËŸIlä¨Jc¯ºös¸:‰È¾k,Uez^lD©w)_óu`QnÓ¬QÞ×?EvtÝš±ýaR:å¯Ý‹ŠÂ›Ó¸Ý‘Ôš‚îù‰0B5%¦üÆ'·²M*`úDn7cÙxŠÎÂÏîY£?ǽã×™ p"D›vz£©ë.\'ø±fÄœ?CŒäÐã†UÊ u LSÇùÉŸ‰¡åE 1j +„.Œ +wçûVÄ‘{çÕB=¾vúï^eSÍš…,Ž¢t)Í5,1æ_†RͬûM<ùùù9Q$ì—&6 ¡Æ?X•xƒµ×KÿBì6 zÉf 'Ž}ÞÅÆ5iO “ÇŸÖ?`˜3nWÃy9@ñ~y{!‰«o¢ë)÷*Ä'_Ýê6KΦåÙ[ ;ž7ÊÛ•7<°øÀˆó_,¶–Åyoí5•<®äš}¨ìÙ… ïvˆŸ'êZ2x··ZºµÉÞóáeAÒìaB#»9r°Ž§ý2T–™ãXŒë~[™˜Ûõê´r/lö’[3ãÊ#.^ê}µßÏ$P' ·_ü:Ú*›äãr"ëj¡µ&”Y[LÂUlúªñ]êF!Ĥ^ÚîÄ®n€b;l¸«¨Ù·À!¦_ù Ì8ùÄ„=»!áL==ÑyXÒèÑq zÊsÀSm„V¬Î‰Ú‚b¯÷‚Ë%à*ÃZI §4ô&ÔGÍøïÜÁȪ’4Æ@­šûãâªþ—`'±^$ì‹…hɪ@¬xÁg åΈœà´JO\W¶?¶Ï˜FÍ8=8WñÎÿ’¦ggU ê=~r¶Dùþe8 û>ꙂgûQÏBî­{°«w +¨}´Ã@ÐGõÑñ€ä“Ïw“J±vÕ¹Üѹ. °:BuÒýròΈ—ÑëárÆüÖ³ïö' 60¯+Ø—Û<}ÙÓË,J˜äˆøÔ:K²²|ƒŠé„²EæÓb#«—'‘å“—ïiA\jM XäÚ?0-p³›ýbŸ„µôŸ=V]&€²]§óù±µÈä=ƒØØuÑ¥X Eþýò†TP\µÝî,è+íQNÊß®ø¶>@ä'R‡ÊàS”ë4—^bu×o³{1ëv4)i÷/<¹ (_¬r^±zqt•¦¬ßö9(¬rÖ6®–’VÜ‘”AP<™ûÿ4¬å»pæNíï ¢G„¡Û”]£É1 ‘æÒK˧÷oÔ¾{$ŽÙo!M[}°«Áã‡LÝNÙ!F< +8žÉ¢UUï^nÚcIŒ¢P„¸óøòaH‚È…ÁrigΈ Ô;ŠÌù¾¹ý–è®ã˜'ò•²_Kã·Éfq +á(CŠÿµÄ~X'¸mÂœ¤g\E™AEäI‹Ö¶¶º$ÉknÁJ!…ýx¹Oùb…ŠåH—è¹ð ç3hj.ê.P×Ì@96Û^ÿòMK½„Ód†+B}–¬IÒ-U›­'ÓÅe'3Õø1Ó‹wmS+ɯsÍgö³:WÒŠêH‘æ­ô –³RpÇ2Ùž LïË?«B>â-‡a­Ÿè±ñóÎÃ8•r%£ÅgÅ +œ{íçVÍåä;»` ù;ýåå1è)„}p<ÖxÈ4ŽÙM#ª±!DÕ_ÍïÇɨ•Ôè7@M)¿>PqÂÅ +ß®€™˜à¶À,ñ+蜠v@ÖŸe¦õnÀuE«³T3ìßoÜ£lóGy}õ€Ú +Oûò %ß÷†Æ_ÒOÄ0ÇžÔÉcÇ *=á38õ/.r”L|AIÝ–6Ù:[c*À ߓױ#ÝG*þ“êÈí^™IÍI݆̺¿s§"…'%Ž}Ó4œè?—Ó§"áܧ5”©)ÓüUÁ.™…}á3ÿžÁÚä80†´·“7EÖ†nÞ€…,]µo^9¦—ýÙ]ï‹-W>ü,¿·ÄõH¿áic‘P3q'Æ‹œy¤­\‘÷—|ž«â¦€ó+I4§hø’£ÜZ€W¼Ó±¢(Š|¾CiüR¦¢CÔ¬«7™l5A…׃ViXwûýaGÖ¼:N&l—”®­|f·Ÿ»LÀÝð% " +ª×â'c`¬™ÃW‘þEði·”k^±×¥ÏÌWÝÍ®b Á¤[Ÿcƒ/í·Z|<td’ãÛ`w   xÇì±F÷ÐH•[‰z0ë!+`)ꙃ‹¥¨Ž%À=7 l¿Ý9.þ“‚¥p#{M‰Eë0tàÔÒF-Ù 0M˜×\ps(†ïôâ*¯YO¯¢ “Ó‡º* 2G°|Ù¬¦rg¹þ¥›Hx#Ÿ5ıˆÍçÖÒÔädün‚ãòø./ÎFŸuX¼åÁ.¤ÿ(SÈ~±&¨Š'›s a_„¡{¢-ζ CJ:ÊÁ$VPüÄ°ðþAðx1Ø"OP˜¹(¸ +íãöZZÞ¼ŸËÓÃÇàÓ9c“+»Ì^¬±HO¼U¨1â¸(+ís’Ǿ+ai¥/1±Ž•%ëàÖv= "lPÀXÁ/И<;õxŒ†éÒYPºÁ•AŸçÈKúD34ò­ˆžJ># +šoZƒ§Ð­´±š•Ì*þhnUØŽG%|ät#û„m¯Ó)}£GJøU„°T¥ïd[ÆÒ™ØÌÀZ_"þý¶NB·:›€S!8Òü®/,ûHÕO@afP²H9ÐÝ `½ûýÿ¨¨|"RW Q;Krç;5@—šäó§O$¬øyõéÐ$sé^ï3w*ÝkÓ«ÆO(䙵ËØÌB±:±ý†G' ~j¶F§QAÀ}Yaù&öA€-1ùª@ÃÙ 0 IÐóíe¸Œy9 A€;ÒâŠa†ÎØ~Uåï÷‰”!]«Õ HÙÝÌèùæ—¢¦œ`7ÎÑhÿÀ÷—Ó¸m¤t²<¸e qÔî³O6IXØÞYæ{£¶#) °íŠ*V?hÁÂðo0¯¥Wy¦€.-÷Ü4!fw׿‘8)r k£¿ÔGeô4H€j²gX3§!Ñt¦[Qãè© Qšo€ÃݵÛOÿç2d‹BTa˜´ÀoR‰ÁæK¢ÏWé^áú“Ì)™Õ!ån®OGþÁdÑKCÝêoBŽÎ›¦2ἌÏ(•2†¥³OaQ,1~’¨ŸM*ÎRhÿå„"râ1˜<µévÛÛ-§ýS¸ó}₵¶)„ÊW1¬@4£¹©®n½73´‹é0O‚±k5—è®U·ìÜï½ÓJD?&;A£²e•Î*}ª©L* ÿuH7gßå²×ç`Ÿ.[ú{ýš•›²OŸ±‘PŒuÀXøûe9œ¼ÙÉá)PE”²åßqˆÜ}ßM|‚s&³Æ/XÁç)ò$ìûNÀ ïHçål§¯–¹-Æn§=<=ý7Øük?þ„á<¢KîÞˆ»^± +ãMÓ®aòTé4¼)C5P,(Ö&Wgæµw7Lo‰ä¬Ö&Õ2ôõ“:"†·€º+l\TFšã­JQ Ve5½®äpïLÛ¦»/µ1^v {⡨%-¶r×jSò0K§Td;hWÚ»R» óÛ¸‡_ô˜SÉQsÀhU7%«©B?K&ŒX"AÕlÜ® ›Y¥kW´õ7PJ½k¯—ݨ꫸Äè]B?@ý©¯>ûÜPK÷ûG‚páÀã6PâO5>B:’TÜNj^ÀºÂ¶éæ´•À=*Oñõ ÒÝÐí1! j7bŠÇQË$ÆÀÔ›¦ct†?k\H>i#en’J:è±ÈòSNWíæ<?:Y¼ˆ¬õúëÇ$% â×[ÿ¤¬ŽÕŽ4N‘ÙA/4˜Õ;±´ÜÔÏIº7¤®B›y`‡C‹Ù–ìgXê캽Åüî>´T‰,ä)Ãì¨c[z§Od&×q¢ôõ ÑÅÚŒm&‰jA1;*?äµ;-a# &·'vÉè/bô*Ë–$®·‚M[ +é(Ú@sò@¨\y¬9 Fêªè^ ºè·@ejp@Cu`ÎõþgÑ$­¶ õ.N›ÀýÄîE­Ù4w'<%ænÏö;1-k»ú¯«È?ê.f'Â’(¤(ø ’Þdå"p™)6ö¦-±…”Ü.Xƒ +}è¡WTGÌó»j`7 ñŽêÅË5Âqõ +ŸŸ§W“GïýIƒ§¢òXWoîÐÆà¡›ƒ°ýt = ԙð8?ëŠ ¹¸Q5MHË»¶1ÂrŽÉ<î°íé¼mÿõg]n¨˜ðú(ཧ½½ÏzKº¬,Ó—hh4¦º¹nàñçS]·Í{n˜MîIJ{¶7ç~¯HL#ŠbÎÑOb ¦';1a¿-<ãÜ=ôkÿÁ„£Æ¡‚V?¨øö ¢@\›\ÛœkÊgÞzã ècqÜž(‘ÜR]“Óî*¤Cb¡•àx A*`m3ÀsžÛDãPãÍøåiîâ_‰Ç•“}’Wˆ ›×Zõk½âA ²±6­’°ÁuŽ ’QÝLh"o&|¤À霣 u˵«Ú°W"â+Ï8+ø’Çg‰IýäRík:a¡‹ èjwNÊcg"Œë|¨2A°‘P?â°_uy®ôš=O$³Ð óþ }˜žû¼?‰e5ÐÜõ9™*2›0ÂÊsçEQ]û^<Ø¢Ëo0>ý ]…u™}kŠkõHYì¤åöÔ£sȈŠÔL)Ç_Åuå' ýæP×E¨Æáþ Ü"1NŸ(Ï7&QHfõÀQ†õjž9Úö³9€Œ> ¢0äOUD,@S³ +ªËÜÈÛX]íãèEÌ…k`lÁ’‡G‹M!]°w&Ä™Êáø¬÷—q´ä}ê‡3_”*šHŸ°Ñ;äÜðåuL… ¤J@}Ù«qˆÿ¼ãª)ã) +=qXŠ jŽñ +©0Ô`ÀåTª|ÖGš4á·VW´ý¸¯ž=<é_bœQ? PKX0Ú€vsAd‘‚Ε³:ÑaÀ>’ô`^eC5˜ci³`KÁ2­­É,ÛˆŽ{»º»OlM€ˆR|Ú¾Æt±5€£'"¾ý¨Ñê~ ó:Ĺ%Db¤/A U¶õ@'ÁX)cq,}Jß(Cü€š ’‡ðÇ“÷³"1\œ³Ð+p)s˜.CHñL”òÇ.ÅÉ*››}õƒ$®n;cÞ2:nêþ©d³÷”ÿ%;qTL£g{ö—¢nŒ1ƒË‚š´U£Uß¹œ§äKnÚ¿Š»s"Š´ynTë£n‹Á"±C*k©¨¦tÊ-_8úŠôf­*ö~‘ì+KèÇMr§˜ÈÂdºM¤$]5Íψ…rÃÔâð#Çd¶p6^Ùí*Çúk—SÏ(’±ŒÅ„ ä!˜Øf_ˆ¤«òÄrÑ’ù‹©LT¬vtÜùó¯8 Özh¡X¼ãýiÞùep¾©dªMpŠXLJHºHàø¼³†ù*†aÎnB೦ÖÍà=ÛçßÿMÖJ—hrõ›c¿¤E`þõ§ò²2jØæÏ°ÒffyÁ:eáÖ‡+w=~ ´WŽÔs%¢!éÉ;Ä)½Ëäõ*ÅÃbz#Ú²Î-ìSÇ o33”Ëo·B Žº­'!´:4Ž~ƒ‡1П`ëå€&ý§€– l ¸yÔ\àFå#ËšÒÂlû&q·-' ØËYÊ tÅ»žýÔ°Óf{_Ük㣀Sx†öÝÙN”:ÌÍ5‰6†è}äê–>'nòNä<Ö!ì2úƾ+³Ö\K?bˆšó2%oË>§Šž·KK µ¶t° `ôùæíà2Ñ$(€¯ÓžÞøµOàNœ›8€Tå{®fÎDq#ß\CIV¼{¾VÞTˆÀ0lnå‰Âç3¹¿–t,­§pû`Åâ7;$ÔÔvréª +Ç.c„)*.Ða0 ‘ø¯· ,)¼»vÿ¾q€²ãßäZvÑ |ˆôþÛûyOÓînË?‹YSy šê¶n÷Y\ï„1Q—>©½èûƒG0ážFÆøpõÚr©u”_»dûÄòX‰ÌÍ„ã6œóJQ YAø3çe ¿áFÉâµ2CŸi÷%ªEõ#t»ûöHQ0RX‡[Š±½ÂêY£Ô«ùFUñMï´íð’²5Ãñ¥83ÞêC¾U&šúñŠzŠn×!òo—Z9æÝP¢fГZ>?@ÌM—ˆÀãËVdÆ ŽRJým§†ÖqB!Dîrr~|Æ BÛÂ`£xßã +”DïUHçº!ä¡÷1<“™8´þžšÓeËS籉kðªÓw÷ô3/º\ 5cž„—› „,öàá~º#RÞ)K†ë _Ę̞,ŸWïˆG†ÈÝÇð¡Ò±–zÍÀ.L…‘!`lr?Ö¢ÃCûÖl÷nw;hØ˯Ô%3¬˜-÷B'Ìj=ˆ:]ê°¡ÜH‰ÿºÊ|KKôG8”‡ç!\ü +*ÍÕ£“;÷µ—t‡Ì·ú,p‹MÜé«JúY w®eŠ<Ú¥!Š:KaÛØ}W&„®/s˜)K1‘µî]¹FÏn½…‰Šä‘äpùœ4úhxW,[ëýÝ€—ã’ õlÙÉ÷_º’«Ôz^ñŒ:C¹·£Ñïsvóè•_˜¶ëOƒ­YªÿÎ̳j£^ѹ„†#ÛQÑ-Ì_æ®;‰ø~_V-_ªÞká†d!BÀy$"Š‰àdY³šŽ¹†‚? Á¼æj)(˜óÛfD†Så6£Ø“†áCQR4Ê_Ž/ŽB«ü—à€|õÀ~OÅ©Ï-$_ <²ÂÿÝ]:ZðfïSän¥Ôgs†¿ÿtÈ>ÚVÑÑæ¬,}`½Ïö©WæÕÏ™Q©Ç*'Œ[¨s öLœ>ÿà.{#1­SûŠq‰&EƒV bn«d$ă;tým‘/i6@SZ1¦½œ3ùCp\jx;4»o#0Ñ?NGùkaºˆé•Dá ‘Š°½Ê”Ľқ=j÷É6]ßKmyp<¢ê +’âµò^wf#÷7 ÐŽá"Ú +ƒ³l’ªZ¸K²Å=¿i@ä™UÃ3ðÊq„á%Dn\Ë2[ùCÃ×ó-IeWD¦±d7¶ºA?„jˆ{ѳ¸žïsV_= )ƳÁÃûK%•mŽ3{ ðmî.€èò‹hJ"D“òò¡°Ñ?Ý÷æG®(²¶iÝ¢žwùAžN˜fgRìûäbáG€ ‰óÐw&j9³OK?€LfÉ9 ÙURÚ›ê§ç L \³€~ú¿Üùø6ÑCm¶Ø¡'—˨––ÃìdT2‘¤–h‡†}ÓÔŸ_1° JØêîWaµ5YWÌ€2N…’]>Ì™*®{mãX$L+¢U3E®†~Ã)d|RlaLXÖ­vhÀ}LE#´QÄ>µ˜ecá¡IŒ9@¹E‹·µøœÖ–º”wÖ)ó¨-HÊ‚šábàá‹"WŸôæŽÚ‚›L‚5†Å«~ý#Êél”² +âñÑGª¶鈜£Ó}º=´1£f(ÓqÂÑX¡ºeÈÒÔ|êP.~Ö®$ÁQ®è° ØâœÄ`¼ûçsƒ«ˆI :ee‚˳Ÿí¥ö†=F€N9r©¡ñVìC/ïhSúQ¯ÔSÞ ‹@¤Y+À"m$µP6N©É%s‡C ­Ô¬ÍÈðfÄÖáQ¬¥¸!ÇKä¡ž†"›ü¿½³C£ö è½¥’NÝ—Û²mb…ðyfÓ=ê;éÓfÌ©öyC߉?,¼Ó«aÅ”hd¡ÀëS–ßÞ™o;Š.'º‡"½ÑŸ‰‰t ¿‰V ˜þÕŸ ŠWéFJ+V> rÍØZtcP.د¯Å}Ý‹þ=ŸÌn‘ZfëNŠÞøü]'1ÈAs¤››É¤h +& vòXØÈhÚz +j/c5~„:¤Ò§ÜD«°™üäÚe¥üôLÞÍ«W‰K?óä_ÑîF7]Žš@òU!ÝÒ’ÀÕ9êÇ‚¼?aTzøÌ¿ 'öç!}·ˆƒî ªºöñÇó6õ—Ò!„¥S·£®÷š£ôÅÿ„¨’|ÜUˆ±ªÞ–€IŸy2Õò»Ê,0Bò-ëq5«‘Þa)¦¯tþ ¶=&Âýõ{¾ª¢Û}‰Â(xzu¡öå­”JY ÓšÂ4`WFc~—ø˜[h#¼•P +¡q0ȯ‹¾cÖ±ÌÞBQç6æ±ËÐuAB`¢¯ù[rJ`ñ|ðVúûé壪XÜòwbM|ã|ïdx†' +W6ÚX”A@?ã†ß¬"G¾ JˆSRÇ ìô\¼£ýÞ¡Èbɽâ±R„+øþE#Zâ9§Ï¶Ö˯·fuL‰4ù«s‰0ëqaEZ ¬‹²·'HÐí?ºBÈ6±ÿò|¶jèú|jþò”´&…ñ…y$–žŽ³fª)ŸÆqx»Å~¶só‡0âÂŒÚKx÷ˆÏÙ!ÒQØIíÄAÿ0Z¾oôÝ‘:¾â‹úàêãªFůnù./ÎFŸuX¼åÁ.¤ù}dVû¡²_+Á!ÏŸOV‡Î?¾r™!ÆmsÓ¨K ƒ2À«“Âý³ýLAÈLf‹å+•ìÏšk¥Ó ë j߀ùs’HªR¡ò<4äÓª†&`=jÕ„é$ÖyþlxêCø˸v3crW¿´Þh‘~-›ø|øëHÇGŒa´>ïÀêå’C—[Õ)ÕÚºê†Ö‹tŸRÓ0ŒWsK9àýí¨H¦;|­~éj Á­ˆœ·…™?Z§J†xJ°ËX/Ä?iÛm‡¬LcYŒËéeô‘œ;¿Á…k§{E1óT] +lRgÍúæÚ‰žÜuVË´Çf¢ÒA­/žkòØN¶0Õ)a D3Ž h ÁñÈ »nìåÆZRª>Ð~«6²ÇÈ©t è—aé˜8È ÄN¦–€xo’a×~æ³$ºáRÒš{_ÅñtUFØáˆaÑfÅÒ1:ƒÕ_¶ +R„â&…íËZðnàÍ—Þˆ'€:cŠäpüf±¯qMá‡ô_‹E«rùvµe¼ +v + æh#)xÑüF­@aý²W.ÔîxÒM¬ô¶¬E æ®ËH£HÄ-y$€­$J–ËÞïp:¥€‹þ"p3qú| G©lÄAœŠÁ·£œ»ãç] \tš'¾XNR€\bªW×rÈàò£08•É&©9 Z,ÒcÇwåô Ø@Q1k¬7n¿÷Îi†«½Ôï øÒt¥· KWòG4 4aëµ0D>šìI©L‚îßcBG˜ACÃÚd¸µÍ¥ÖåY{Âë÷æÕdÖ1A±í æÿÑØ,äЄ5.—ð‘`ܨÐÑê%‘ÌíkC¶pRÍÓÙ™RpÄyÞšOkR ÔëgçálêB½€Ïˆ§ÝTñ.2Þ†fX Ü éû©ê­ aXèáX1_í©M×;n{B>)Š¨ž¤ÇûI\óg`ÑÊZVË~³—Ë'ý ï†DG‘Móí,q—S»íœC[î£Úµþmí.»ÊÆ‚ /D€ÇΫËs.€yN§tÊÒì¤XÕŠÛj±TÚç’è…S¨ewªV½âÄý3ëP*Ò†…}Ö}‰ŽS8Â:àã(ÔýÅÂèe,Y!])Žö0_ýüfx7k£*»Ó˜A® ¯Â)½ª¼Ç¡Ïèø\ÍÍ#é&Ë]RÝ7ÎQ&dÂ[,à†q>9d7ÓzWƒÛ*¤ømðm÷j"QÇyÛðdŸÒoÐ^±jwŽNئ–§Ü3"Õw„¿©ÜZ‚¶bÀ¡VjxVL›·öfÏ®þ.¼x€OmÚz[÷íÃlxã ÌÎNë§Zå É/_ooxj 1k™/T}é7/î ¾@Tcý&q6_ý5|ÚÁÛ+×£–.„r‘8dv±lhéwÊPQ=¨Eé5U% +A!Ða¡C¥”ò20CÆ^çèKæPG¦g4˜`Y®„Á†º=ߊ=¦&{²rAMF‘/˜æ¨‘åßæ"úçñ;UÂó Xʵí¸CKqJ:ø$Ió൚°i:k‘Ã}À ˆ¼Áù¡ ß…½DÊ<„B(ªgj÷5épMnãü”:£-.\6þS ¿-8è´’‡¡ØèÍnó%ܪ i· “á ÓeøvÚƒ¶FÓ]¡ûJ£Ž¥^Xõœ7MÆ­J¨±%ôˆŠdyÛöi¼*»DbƒZÎà(ñKhR§6;B¸¯seh:ž¬@Õ›™fl[ã,Ç"ç!'ñ?3­Î|[ó°z¢´Y¸³ü*áæ¯]+­Ã~KŠqJ&‰rÍáÇ&ô÷`Y±v:ó/Ÿm‰ˆp ˆÖ(M§ù£·þ_~Öâh´Ac´é*ð¹?% +`§• 99Ýw s`¨|cs•ë”8†T¹öPgÃÄ‘Ì£¸Ü7çÇÀÊMµÃ™?èÁÄ©'[Dº=An^N6ÙÅü 3·ª Fó£þ,1£ªÜç?_½„VåS¯qŽ ö¬oLæ cÔxÛÙÑùëiY±Ó£—fQÌ\™LÁ¼'äQ=Â-JmIð RV›}§÷(/½9„"õ-×Ôñ™»ØtÝD-g×+JŒ3,ˆãƒ$»ÒÔ.2S)tÛ`¶°Dåëv)’€eñH^Ny*àc<3Èä‚6À¦ؘÅqbQ—«pJÙ Åÿ<¹ÓÄ Äö£Íð ÛÑDÎö1Q»b»>’´„þ‡æ[-Þø¾úRè«SDè'–8 #ÎF_ýdœ<…µ7†ÿ#»0B—Soep°\ ÏÉJËîŠ+V;ʵ{Å¿˜±[жâýYòï°í[š?ù{3?Âáwx2‰ •oÊ^Ålƒö½]¤ñNvÄXxŽl(öDµ´}¬è&-â¨3C÷tMI2û_ÆÏšT }];»žÍîÑ çkÅUÆ1­­8÷lzÌæ£+±7HÉûE€[adô'X½ÒÈÌ-2¨g8üL.z:D8ÑÑkÅSú¢l™± •}«,»¸¨| 9Äøy¿×>Y3 6y4ZÜQX }tÉœ›â¶»8Sšå¶L>àªPm²‹Sèhù·øXÆl¦ ø ƒw~ ¦vÁÓ"9ûYzû†{µ]9÷O™“Ptµµ£G ¥óCa®TÍI|IH”ö¿í ª¥Â˜3ø•‰üé:Â8(Œ{¾ ‚Þ3ú‡¨=«ž#0m åðÏEãO>goW‰.{„éñ„!H> ”ÉšÓ泫 ìlB•øÿ‡à{¬ØE4ÜÓi¼eIØÐ%–Þ”<zRáâO¾QŽ.¥.AƒóZ½ð]# ôI›;“ÿ–‚ö‡4>öIÜ;õV¦2Ñ ;ÓÉ¥x™j¯•{–<žÚÒ±Kº˜”QØñô;_,“ãÞM!xÜHK€  ^i|yýÈ?e°æ¿zbûl/r§ëµæ[81,ðôÿ)Ô=ãBŽgö±ìÑ +nvµå!™ü®þ{¸«ÂÌT:À+[›,‹á©Œ-TåÀ2½°´jõǪq\P+½Ç>@ô`íC+ýÛ/¾*>D¾~ø8Ç‚¥þùDfŸDJ¯—¼¦ø›Ñ}S&†#8V-9dGÅó;T’Õóy_13R(qŠ³é#:eŠímxD´p”þ¢[ýš(L‰ß­µq“71Û,Ö‘·ƒR<…_Cm4í·me;DïÆ—f"MƒF0Îã±{2r+5ÌɹîËÙ×\%±Þ‡µGÖW¹9ΙKÔ—ã£Ü¼*å@žzõÂÆE$¯o× +¼—MgÝŸªÓŠF{K¬<—‘½¢üa ùŒHžGÑ峟€ ´m6ênêªñÇðTÌÝX­9¼™Úí¸ðYTºå‘‡Twu“g“JÿÚåÒ ^ž}­Š<x1ôg"cÃVb™ÞÿTv)‘ +ç«#8‰|¡ö’¸¯þ¦ç +ˆDn—µª¨¤j²Ì úöúªŸ´ARX‚ü’;éx‚ÏKé;#xò–•ŒCbˤœÔókÛ7L*­ª$Hošƒ¡›.£`+©¾£@ô´TÎ"ï`yÝÐõ!PkÊ5/ê@ÓÕèIq!èg(ˆ`X(ªØÆ’æÙ†º‘ ®7ž0€OPÂß(p£oVÞ¹šÄÆ ´i¬,,ÜAWFY5¶…¥£÷<Ü­e¾qŒr¨@ÅÖaàØÝüèñ/ˆ^!ôÍñOý¢N„ò/Ù$¼OuQ&_ªKЗ^é[2TêuHËÁÕH©5öM+È™ãwÚˆÓa³SË5£‹â †‘&H|Á'É#¹/zŸÂæè¿õ’f|ÎG· 5*PêIJhŽøé²Vƒ÷¢ýÞ¶ÛÕ´©Š´£~ã@ÚïÒ‡FÔBð#À¶„8.ïneôÅâ…$¨nVŒ§”r¾Ê]PD#¦„ßtJ°®ØÌ-yk¦Ê“1üöy‡ByûC¸wõ$¶ë‘ª§ ­@ö¯†¾—øØü"£òE%~ø#K",ŸWoÔ,0ÑÓî'ü'ÿN Rc˜³ØÞ¿UWL‘|ò ®yÊò²sûVj$S+´‰}ï™Z{&ŸYT#“ì­dŒôÙc4æü†n< F>¡ü±A$øûV§Õl„†ÆV)8/¾˜Y€™ï4…•ƒ–“àÍ™@o¾"ë”vh0”U¥Yu^¢q¦ÂÀàVÉ>‹ÑêÞ‰€m·Ä½¸„fßÀÙ¬%††”&¾YèÄêKåëd®w{•ùÉn7¦ ´]"=¸²ß8C²› ò5;EY¢. âW¾’LÔgØêÿ¢•ÓôAs +?KžÀÁ–]^´²¹ð¦]R4Ü=¶»/òŠ7ÇÍg7;q&z Ïzy¹ËµæÝm[ï&ÏqàüÔJw ”ö1²=°)oî•63ñ?jåÌPx“Æ­®©7%J' +m@¬u>…Òé·æKV.ö {s¡ù$»¸Ðé3ÙµoÖRC²Êaòs3”Kã$"{èO#fΤ¬S9ì’ek/ÝÑŽ8½Àõ¸ñ7Æ£ÞHÁ­¿îÁ¦ rWLX`²‘ãœïá0O·ÜëÓp endstream endobj -1976 0 obj +2228 0 obj << -/Length1 2068 -/Length2 12106 +/Length1 2566 +/Length2 25126 /Length3 0 -/Length 14174 +/Length 27692 >> stream -%!PS-AdobeFont-1.0: CMTT9 003.002 -%%Title: CMTT9 -%Version: 003.002 -%%CreationDate: Mon Jul 13 16:17:00 2009 -%%Creator: David M. Jones -%Copyright: Copyright (c) 1997, 2009 American Mathematical Society -%Copyright: (), with Reserved Font Name CMTT9. -% This Font Software is licensed under the SIL Open Font License, Version 1.1. -% This license is in the accompanying file OFL.txt, and is also -% available with a FAQ at: http://scripts.sil.org/OFL. +%!PS-AdobeFont-1.0: LMMono9-Regular 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 778 222 0 %%EndComments -FontDirectory/CMTT9 known{/CMTT9 findfont dup/UniqueID known{dup -/UniqueID get 5000831 eq exch/FontType get 1 eq and}{pop false}ifelse +FontDirectory/LMMono9-Regular known{/LMMono9-Regular findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse {save true}{false}ifelse}{false}ifelse -11 dict begin -/FontType 1 def -/FontMatrix [0.001 0 0 0.001 0 0 ]readonly def -/FontName /BQXTWV+CMTT9 def -/FontBBox {-6 -233 542 698 }readonly def -/PaintType 0 def +17 dict begin /FontInfo 9 dict dup begin -/version (003.002) readonly def -/Notice (Copyright \050c\051 1997, 2009 American Mathematical Society \050\051, with Reserved Font Name CMTT9.) readonly def -/FullName (CMTT9) readonly def -/FamilyName (Computer Modern) readonly def -/Weight (Medium) readonly def -/ItalicAngle 0 def +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMMono9-Regular)readonly def +/FamilyName(LMMono9)readonly def +/Weight(Normal)readonly def /isFixedPitch true def -/UnderlinePosition -100 def -/UnderlineThickness 50 def +/ItalicAngle 0 def +/UnderlinePosition -150 def +/UnderlineThickness 67 def end readonly def +/FontName /TWMFXI+LMMono9-Regular def /Encoding 256 array 0 1 255 {1 index exch /.notdef put} for dup 68 /D put @@ -27080,10 +32363,12 @@ dup 73 /I put dup 75 /K put dup 78 /N put dup 80 /P put +dup 83 /S put dup 84 /T put dup 89 /Y put dup 97 /a put dup 38 /ampersand put +dup 42 /asterisk put dup 98 /b put dup 99 /c put dup 58 /colon put @@ -27092,24 +32377,34 @@ dup 100 /d put dup 101 /e put dup 61 /equal put dup 102 /f put +dup 52 /four put dup 103 /g put dup 62 /greater put dup 104 /h put +dup 45 /hyphen put dup 105 /i put +dup 106 /j put dup 107 /k put dup 108 /l put dup 60 /less put dup 109 /m put dup 110 /n put +dup 57 /nine put dup 111 /o put +dup 49 /one put dup 112 /p put dup 40 /parenleft put dup 41 /parenright put +dup 37 /percent put dup 46 /period put +dup 43 /plus put dup 113 /q put +dup 39 /quotesingle put dup 114 /r put dup 115 /s put dup 59 /semicolon put +dup 54 /six put +dup 47 /slash put dup 116 /t put dup 50 /two put dup 117 /u put @@ -27119,158 +32414,262 @@ dup 119 /w put dup 120 /x put dup 121 /y put dup 122 /z put -readonly def -currentdict end -currentfile eexec -ÙÖoc;„j²„¼ø°Aw-åÎ=Ó%åW˜)-{Ùr½uú•)¯œ‚ßröA•ÉÂÜãE(õ@Úý{ë¹´‡º“Q»û|ü_‘RÑå» -ØÐÆϤëA³Å -T@æ|ýq|Vk¹¿J% qu8P¢øwÄGx³ÅªÛ̆ÖåQæ¯6K üªÒ-UŒ\§Ô%¡bÕ"t-*ðxÔõöÓŸÏÿJ‘+ -}ì3¥{Zà2ŽùÕzݬT2sÀ$ZõÌÑ&{Bè–J×{¨¦Œß=מ0).ØnÁ¢ðBd^ö/¥!2-G×+~ò¦bÑ7³ Nø&lƒþdsõ˜á%o5·ð3N¸ûÍÐP±c9îU,”Y뱂0L²¿öç±Ê+OAÁd<¹Pgqíê‚!i\žhßx¤;B+üµX¹&ÜŸ)o²™ -û­¾aIñ`¥¦Aï.ËÁ…o_Å Ò‚«d™]Ùì©@s—p!ÃÛ×—;¿ù¿+üàî c¨M›)U *m¥žÁÒ‘¢4+o$¢U´ƒEc”E*­ü_G -‘õÊd.¬»U£Ó9ÏøöÚ¥#˜]æ8! —S}/a—§»¤«é"ÈöÒ48EV< >™N;œ¶ÓžÜåö½[9Ë“!¥"¤k_‹Ó†$RÑÓbÕ˜ÑgÅó ÉuŽx¨ÓVMhìÔL$3P˜ÛíÒÙy»5œ¬,¿o"Ð7'ˆçº*íE¤„<±ZÿÓé§c»ñªÍb8r$`FÀäÞ§/±úröôí¼…ÝøÂ]€“:BõóåI\$~—=Ô‘¢´ê€Ïˆƒ·¢Ž˜ÙËøȜȿ!äê–²Á±QÀ´S*_LÎyäXÊg*½!#B;Fdyú9 vF.ºxÂ&WÜ€ÏçÒc‰K[Òé¾­újô­ -ƒÜñ ²ßTœ-hj¸˜ÔÀ’:Ö²êÌbcÅÓ¢xù~Ðgk0š^£Z–]Ψ´&Œ33gŒñ3æêwm2÷ÀŽpBŒœIÌ€‰:~q¬Fà邉u<Ý^ðIE=Ï*ÑJÐ.­Zw†Òcî3Ö8ìY±Ö.1jKAs;¹¥ðÄ»Ž÷`ÎR^ÔyCQ4y»ˆŽÞÈå ¶Ç&{uðQgF -Ð¥vl²P¡ï¸©°¢ÌÁÀ -ÊeçÄ=Gylw -x]v¤ºŠ£&ú¿o9f·ÚiQ¼p´ÂÅÞÑymQÔrijËFÛ\ÍÀUBuª :sé燭™„Çy7ë䢼R'åT½‰»…ÐÐ{crœ/ðbìŠzfuÌyõ¸ÞÆ-_KýhÚ~ž sö‘£¬¸Ó­>a(—z•–‹fz!#ý¦…vÝZu¼ßZæ"VÃUEÉŸúw­øý-ô@»ã é¶Ùi äûaI+˜ RêÌ¿±P–ÿ¦¾;X¼C 6e½ë›À+îj‰e×Ê[µÀ7ñö-^j’¢â[ïÚ -g‰Ž°9‹‰3­ ØÃ…ÆD©Á’ùˆ‰ c% ±¸Ø™Ûy¯JC=©zê”íÒ[t v™™”Ê©>‚VæÅ3¨m™ÿ®9Á‚ð=Wp¦ˆ+ãlÖÁnׇ3g ¨#7Æ:ÊQÁ@µP~Ò-ìu@Ó±DïV\sS©’;þx; -ƒJ®¬Ï ®'p—ò -ºy?Ìþ^CÉõòùI ¶$HGY tÓ` „é\ÛLœj%ýf¬œÎxÇéìS›ÚŒrìÝܾ¨0þ;=žl;°äÀ MÑI>o Céµ" - mÆÞ³»ôªI•^:ÊO|¿u.^âÑNçuóœwb`ìÑ9º5>²¤øÞdJJ… °Äð0íôóX¦ðÉpöŸ[ÎtóÝàÉuè^%ئò`'õ -0T¡ I{ …6j=¡úÌwŠØ ɽ¥¿Ú‰»ËÅ7†øȘý¾ÏÛìÅùÚ:{ú¦©õŸº‡?lH´¼™)ò—ç8O®f¤¢mXn,Š³˜Ëß— Pš½iiþØÉ]X&•íBûx{˜5ǬfE¾ˆÞ®¾DtàçzÖ9nËy¡B…ÅÄ ›ãt+ ä4ŒˆþÊ, p×$¢@x&ý|ïĨ4H~4îŒJ7¨B‚àš<¢›Ð«C±ÉPæÐœÁ}uzèÂ|÷gÃr-š: èuäó¡ôà[ÃDè\; } ±bUûIS{™É27Ú=ñ©Òì FB›k„Þ' -¦ç€ ‡.~"ÄÈôätŸíOz-3RûŠæÙ·ZÍ[——‘“&ÒãÉhDþ÷|°AQßGCH :\'Š¸`aуã2¹ö{=o >uÐ&Ž0ý¹ß»”;e†®01]VÎÅ2c¨ø¤œŽ³`š †“CÄJ1/ŸÑL¯Wþ ˜Yckɲ¤¤‰Ð õÃ}c§8ÚLC|¨±Ågƒ%¼Y«9.Œ‘òµ?›óš»e‰²Ð+€‚ÕÃPÒöïI4,}# yE؇™ö]%TÌ>ÌýƒðÚ},ÁS ¤!*Á.ó¤ “˜v˜tåÐ7†÷ó[O·£Â±Rø«Y¶‡†Éìõ–SœSÔª5Õžl+>ÍŽ¼±¸l6¯åU®Yºï©'­¨ù_¢p%ï=°ü]^:k2 Åb¿3Ò²vu\þƒ—k!š†®çnúõ2Ì‹ƒ@Q‘*vÑ}Ô5z3màØšò/Æõ¨¯³¬Ÿ´_‰ó~¿éÐGfðG쪑“4”‚,¥­‹ê=¦5ûø%¡Ú¹]äEHŠfnÉÛ€x‹ÆR´òúŸL/'¹ 7.œ·ãñe÷Ͻ§„¯‡‡aÒG}å…èÂr²¡ -~÷) «]<$•ð¯2uµ¤<ýv<ò×NP|¢AÈüGk¤Ì .PY­ÆÑÓO;¨»½BRœ)­©o¬éVê°ÃðSþæ<Ä‚˜;aÅQ1Œuz0ˆKÄ%RÉvng;‘{®võk«ÅîÐä~؉nË^…‰À8@Þ»ªYö·Õ¸(Ø®xðÙ•(tÇ&XYò-»­7*«ò0ïx“)ÌEîtC¿w;ga"15Ä][f³l. àèZæ¤ÌIbŽgÕswàT&«©¦Z̸ý)D…H­†tªe ¶Ì6¢¸ ~Â5÷׶?œØ® ´ÏÇûáÁÂßkˆä…r.)¶•l´ö¡•Èx_²?ˆQ­WÑ32…š(l¨ãë ŠæÑ|O×b-F–çÑ].–®RPD”<Ô±ãDûOàD¸›]D®ÂL=/å™È¢ÀõÁKîÇõ­°¤`ÖàÉFƒgQ_ÊI -ânûqP¯»º·«_(˜gØGK&mĺt¸zÂéü­_ªo ×`4§z4¤-71kïds•w¨\x ZŸ’8´ éy\™šR¶¢ßÒìðÏlÌ@Ô€Û›…óÜæµ´$¨¯ðÍ#‹ÉÂa'·{Ô‚i³~KljÑåVBîñî“zU´/X1 S¡äq=¸þh&7uó«GôV¨èA_ÔÁ3ÄMüùâá~Â^…›tæ0‡²G&<ÇÈûÔו}Òðøh޵Ëäú0„ɤcÙÒ°ë•>„¶ô‘o6ˆ½)ß{QB”¬X[#ÞCÄI3jƒþeó’ˆ/N€ŸºUX+¤ëÖ`‡è‘"ŠP[\ûàƒºäAXºa|i¤–” Hn­k¹,· Ó? ‘J_y3^ÂcÍø}|at½ ü<â;1v’@¡Þ{EÒ¬«ÈË š¢Á/šEì7+?`µÔ+7ÂTԘɈ/¢Ÿ~£2’¢Fe匊·I›ó5)S®XÙvNŠ´eƒœ“…È BÒ®¿my‰Ä_1Ö4tüÅñí ÷Ôl—0w–ÿ -3Kq -TâùÜÏ­ÌÃ\¬Ó¼zlª‡]%b¥pýgÜ7G)½tâ°[àV‰ÑN¡¾Øþ~ÇO^¦‡˜çŒÔ v`Š«üv`чm -ňX h¸¨ý—>r±¿:ƒ9e1SôNCìà$cæØø% ‰Ìõ$¦,T¡M2ŠÏCOÝüV¡eŸ=•øÇS7áu£•_„{xœ •Ú$ì:#Zòi¾1ʇªlœ y ö^|Qûž¡°Lψc¿&ÐHn’)Ü ϨÊùÈ|heÃÝ À}ñ›IøºMÏô¢Ç•WÕð*+-Z¬ëß,dÞªyf¡¨mvÔ›]t þ;`Úo´Ø]”ŠWH¬°íàYÝ}'PN`ºSuBf$,ïÕ•ÓLöÔäó·=úwm³0mVÔå –l™ ˆTOnt£6×M"X˃ãöq¼åDŽf0Êms9?Äžš_šæmnH`ãA…A×mÒšº -ÖÜ­“a’èŒîjfnÌUØÛ™ŒÃCž›¹a£t·/ŒÜçÄæ½òvÛÜ×ßÚPqE$NP^_Ù…‚dùèv±¸ö§"TŸC[ þ¿|¨½¦wÒ1|7ô—B%Óÿ¶èf‚£W­"dÀá6QIa½÷Ò†ÅE.á<¾SiœùÁ—Õ™)Õ+µž Ì%ÐÅY`; —Š®5!‡KoLS»—NÕ’öhPhäDðÚ¶R‘™-¾„®¹Sòéæ‹„ç»? ò¦åÕ¤S8ûŠ!ª—,ÊÏ „«vÏN²[¼ `É"Baæ’Â5ÖÂ#ŽdwºéW%Yl‰¾~:/´‚>®Z¨;eÔFÁ{N# {Òí}Ê?7yøg -˜hS·…¢ à`tZ¦" *èQC&KsulÊÔJ·š|6×iÞ$rˆEÖFXggξß&vÊÈH_I«¦)³ÇÝLRá*Ž±·QÂgÉX¯ušV,Ð’"ª%_©L΄¤"4vܹ¥ÂÙ©v€Ã»ZÇõ! -åÕÌîûßY@øLŠ‰{Ìvߺ‘+‹ÎN*Y“Ë+qi]øî/^L)㊴Š’myß1=óˆ‡K굎 ™¯~{iµt{ÒnÚ?'†‡2²d,µä½›ÎÐrˆît‘ß³É÷”ö-¼FÜ…­¦Š¤ˆFl L y‰fÆæF?Mè€_¢‚Ý*aÁ–Œ¹ØÄÁ5ÕºäÓ½•ÂŒ!»¬‰£d§ 2‹ãΛiú¨w/"Ï;qfWò"Ž—^®ã0$go„ï¯aWó¡vêm.üÍ¢BÛÀ/c@ž{:¿d"€Aú·vÏø]P}Cð*&kxÊ;ô¡k€6܆ÒxɃ%ºÕ ^X2ÑLŒg]¡’øó…ÌVm¯·­¦Mhx÷äŸP&ï(Ð{Ÿæò‚gû¢Ð^Ö£Èý ĤԒË|í±º(ìdkÈå¿ì/¡ÎܺH€Ô”qÏJ'w¬Bî£t®½§€—žgOÝ`O± ¥ßcÔô¹“ ®\œK*ó/˜Íû´®¸™¼g²9Zö<0väB‘·ôWK=—äQ‘]a9<ÏÀV<-­Meb^|Þ€£q£À"Ä^ÏÌ×&l |ésêgiÿÑQ=¶ÀéõrÓå m[Ò:rL(Ó¸ó…„aXÂœšÃIlµ«eŠøÓý€wƽÑ?Z®Nm‚CU†´úØ ü‘Ö•;l€Àc;^=§ª÷¼/ÊE#ëà…q$SpÕ TÀð /.qKAEÍ æ6–ñ­¦ËêÃ`ÍQDAϪÐ'Ön¦6O¨ýGÒã$áDÍ’ô[Q»ç8s X¼¸9ÓÏ'¬qQ@µÀ{ÄŸ§ðn™ù઴œÎ¶Þ×Z'v³4쓪ӕDºúîÙŽ»-GºîÑáªJÀ8šÊáðY+:âóâA<ÇôF&úÜÑ3'Ë]˶qȧ–·¸òÁ·¤äÕù 7׬ÆLà”ÿ O|òg¥dÑ–ÓÏè6hç0ÿ£Ú%ÎPL•ðƒSÂñañ8ž R>?ü6æq¯Â…$¤‘ø‹Qø8sZx5í¤TÒxo†$3ÂãØ^5ê t·§áhœÚ¾Ä›èÒ~Uƒ¥f”vRÊq{oíY“—00¿4¶W2ºö‡n­D׀Ƹz¾ã§[›žlÔÜ´ûÑ;qÛ£ßóâf…}å²°¤$ñçÄ0¸}Œ¢PgR¸ %Bîu0³Óí”Þ»i[(wí4À’(G§¥"O#õÅ-_ª‹jó5E]WÊÍhíˆÒd]ý#ñÓå‰_óúȋ˘†ƒÿ"ƒ€Œ·´äëõ,ªª Üí…ÑbTlyu~‰ôCT…C¸þî•Qªyb9›6›jÍQÉÍ$1†Šq¿JƒzÕ®Š°e4¦ëÝvþ†ýõµÈ§ƒG%Õ[)”è¤}›º@ìh],l…u2 ‘—"t%åãE=3jI3Ï?èÌÕ·8‚›G[Ñsv›ä³hÊvì!RÔf1~ÂãYA Q Œ77Œ½ú²5tÇc¸Ó.,ë {ºr]‹,ú’“õxôæ›wÈ€„¢X.ð}Ãuç= Q°-‰–U>s/Þ‘˜/;v†e)Q/ñÀ?§üê…ùÎuRT -«ç>¾€dýkWÇþB¯øë¾&¢’ÃèÚR|S }Ob¹Öƒ˜mLjH'ØéhsO’gëw¡7ÂAeìÉ Ì>M«¡©w§ú–_bó'(>+¶ú˜:ù¢hÀÅ*þÛYÛ&Ã쌢côômØXá݆ŠÇ@åˆ>ÓêÖŒoÕ*‰ˆQ F¤µÒC†'L ¤Ò쪕Ën2¶s)|LêÊ"Ëóë–²ÀöTE®°=1IÄ(j臗œ/ ªLƒè@„Ò„ÙVŒ §Ïù$½'‰IV¬­–Ž‰É_ø:ÐÚf;z8˜®cë½üÕ›Xè^§@±nÓp60×Ü8׊…iÒ1”4®Áæ±Á…ýÎçZä…×µö>8¤ÿKò]•ÐAÖ¸û?HOEW/²‹Q´æû5NÒPŸÄo;&[ -±)D*Þ Œž\ÚÕÖ¶d@aTÁ˜À` £Ùk"¶}EÏi4$¥žK‹ÖüÈx…‰»Ì8}g 0Ϝǩië}ªüÔ ÃÅ^½': ÞÇ_óyZn*Š/!¦‹{0*öÒ™üA`èäEÃT:‹ñL|Ì̬Êÿp\cL7ω²¡ûÚêU/’â³ýè•yE¨¾jC9ŽÌHAòèw4ßÓìòÁ? Ø—þß•¨“øtD,wgþD¼ÄÇ?æ1ÏÇM ÅVSÏá¤ûGáºx)°¨Í÷:­Q†É­o^ÕHžåÎÇVƒ*0Jó:ÝlŽ+h³FLÛ*+Pd‡™Uc‰ÕÎ(½öoü A•[ÅÀÇùj0p$8æˆMFFi+’+ßy•Jq‡Í ÙÆIÎ[-¼þ¤—i­¾¶m@M9õZ€?û7•QE¼­•œü» -VÕHÖ_3™ÍoZ¬NbüIGÿ´ÁëÊ'ûñ›ÁB©²g ˆn{ ~‘]Æ/JÒL kY©/Ô½˜3‡¢DH´®×y¿+äS@ø4}¶'±ysv·žMÄT±m7@ÎJ†rŠ ux¾xŽEË/Ùª.©E3綴Í=öNCÙG¶QvÊ«‚Ôb8ª¼ÍvwC®GË™£ Ý$ví™ÈŒJ› Sª*ƒØÒ«CTenBüœ*þݯzƲÝS_I‡Ñp’%PÝfp²J_>^x~ Ã8¤RÑ‡EZáTåyª¯„>ê±°&E-ŒlÁ:wc€XDƒp¹ -/Û’¦ìô‡ÕÂN}ÛÖ°õ²qÑ:ÈUC-þ5‰È¡Rº®§J©¯û€ù•†èf2¨ÊÑ>~É;vM:!ýý*â5¬Vcjpp]Ÿ13‚¡SšþƒìŒ]fyÖþ:Òà KÆ@ï—¤‡˜’æµSèÕq›D¾’^Éõt*! îY¸˜ÀÅ/lÀðïù¹WÜ"IXxNy=Ï+ÛfîE´¨PzõD‚ÿiãô°®² è»>Ô …aĵàq[Àåõjåm‡R§Èke{kõxe]‘;Î_ÿ•Ž=J§è YiÄžÆw/*’íaq°â_ÇLÓáqߨç«õ52³ðŠW U%r7õéq &ŽtE»ÃÕŒîHçyß QMøcÝ(­jg¹u7ñSÕxã0p®ŽC‘~°Ò èüÍ _ “|NêåµÛûæ2s"@hò æj¬S‚î1›eɉ#ÀIË4R'6ég¹,‰Ñ÷”X+Ê)¥•Ýãa[TÌâi„¸fdà+›8¨ÜTÝžõK¼TÅlTâê6Ëø•Ôncwà-‹°[¨’E䯱¥¤TìµGÈ?Itr¨ç ÑCe1K³¦²/Ô0GØ'Wí„¢´*sÅ•S8Ê-H;×Ö`¼fméú#3ªt^þŸ”U<Ý;YãÙŸŠ£“?pɈM|*"ÑÍ÷ÔÐPµuÃ%v!&ŠSÚ Õú»µG×7÷ó9œ·HÅÒ¥Ph›õLXGÓB“—Ë)xTIøÃcó]Z¼Ÿî¸p0k•˜®›ºÃþb|Ë ¿š£„¿h?%ÍžÀ…2͸t|¨î -ÁíôÜ/S¿.FMË„B4Ã4at&š øy{¬áswfnB–Rî%Èv—ðó2¹ƒ«ôI¨÷vÓ V‚±ýÿ9 ?…Fú!AN„ø,?˜uê6¼â¡uÿÜýUö 4…u±XÙf†Kæ»ZS"ç@?uîãÿhQ+sët²'ÁéMš;ÜÅ|¬ÌîíÚË¥NCÞ9 £Héu -À|Í=ízÑaǹ(b•œi¤¶¢„Ñ)¼­·Ò¾ä aÇþÓ÷±¾xªù.,DŒž f{­V) ~?éû EÏ_Uñª•ƒ÷ÁƳ«GsÀ±Ú ÒëMbl«hc{sú´d¡‚ðrÌ=-š4°)L<áU\éHmRî¸7Ñ™=h̼¢¸K8Y¢Ï¨”KÞ§ æYâ’IÊ¿·Ø¿fÍ™=‹,ЙgF;)«îxë ‘ÜŸ¿¤ŠIíOG Çî;6Ù±*$§5Gi‘BÀPîe©CGè÷‡£ ¤£íRÌ?Œ=Œ‚_ÒçÊ>„1ŽÁ¾U²¢ oÚ7wZ‰){åí£‰Žã.z‚×G£›`„cÒƒC&m8Ô¼w´6Þ¸©…´œYØ Ó2$bP:àðʼnØ\èG¦Y‚›Í`©™1¶TÅSN±ë»tvŠgÉ`ŒzÌâã{ Àfkû)¶ íüøÍ`!QGrUžj9°½iiw¹k¯±Vw5iÆ $Þ^òðÉè’_BíªÏqþçî~y;4TúÕ¼äÛ¦¯†ÓÔ 2† 5 Ã"ÓåÒ Ë•ŽU±‡\ÊÌjXNßä¤ xÕ -Íf‘­åÛRôùÉXJ5&¶*çuìfX¸m1N”é=ªV.•c¯·S¦£ÄpW ïìrËÆ¥ 9ýžþ>–„”Þºµåk¨|‚ -ƒ¡ˆ‹˜ÍtºgŸ9ú*ïv*Q' …wwEèÊ alAòã±®¹@ÂrvÏ·:m{C{çÛ‘8aný}>¡·lNüI… xY.L ô>*««¯0Q¨Ø¶àŸ#¾ÈZÓp((œNŸQý2E@ôC¯¢%Kn˜¹!ë|OØæS3½XǾŸ†¬öÝ3òï´A·©£¬ºãÞßÒBYƒ˜ª³Ùìd1î>ÜE‰„$5ÐE3ðúï»Ëó%?˜  æÚË>4Z‘Ã|mcX<ÆÖju—pe2Ó ˜¤¶àVzA•ûʲíMqœ¡]~Ž@ÕÝÝB¸Àö—ððöC×ÒNapP¨ÝMVf³Øå>÷¡®ò+´?+í¸^;¿+øÜu,¿ ¶J(De¯5È?}wTK\{ñÓSá˜ú«›dšÜ¡e(¶À¾zb«þù(a/÷OÌ­æ`Ý@èø¢4çÙB:ÈAœ9Ú,.dƒWÓ]ÀNÆ°N0aÍÌì}ÉÊûP* i}¤iriûÀ­’ÐtePsªÞç.ˆ›bé~z×ÇÎHòáÒ äøO x5ßµ:Nºþ綢Sr%–¹ÿ\¦#­CÐ=úÅä›í(ÍØ_”eV­4Ç›§ºÙé0"½âÌhÑÚs7::`Á¢ä cü:(à;:O}JB´Orßµ KõQ'·Bö±* Ëy;8 w¯ÑÏ©4ë½>@rÀÐñ8ìv#ÅW6:£̃d(èZ6ÔÚŠ#ƒ*³ÓoŸ»,0`ãdÕÆeÒ­n…¬½È‘˜- õ³„CÊ)„P6«å¡b*& Âaâ9·oV‚L“°ðH³hý…¦¬Ûfþo²™>¼t[Dš)i:ü|Ûóãói0Í"§g£JnwÏû«ën¨ÏàÒE5 -ÿ[Ð_s‘–]júç|#÷f>[6³µ…Ü@@¯Œ<×ÿQe ½ÚE /@8 ÿ€RÊìdÁÉ´–ëÿÜê&y.¥MúŽ"C‰I”­ítŽËê“y—Žã&³ËÛ”Â2"ò§n}ÜÙ’ÙÌdß‹ÄiàÞ`càFƒœõ ¡JßC¹‹ EMãitÑÑLS~ðNÏŸ%Ò8)&>ë‡ÊVtâWë£Å=2êfý P@ÚÂË)iå«>êÁ2Í!fQ²6ÂÃ!¤àNëkb ×N´iXWƒpu*˜½bá®c<2: Fþî’¢ÍHùŒˆ¨¢MH„2ïK|Õ§›ªµøX©^i?HÁûÏZÖ¿6±kÜþ^Öï°Ê ôšÈzîâdÔŸQÆrÍÁ°²Ä+±¶äjŠÓÙ®¯ê½× iƒWÍØU ’¥ýÿxj™Ô\²„åf1»×¯ÛÊ)Oðq[T7NÙÉUÑlŠï -endstream -endobj -1978 0 obj -<< -/Length1 1173 -/Length2 2916 -/Length3 0 -/Length 4089 ->> -stream -%!PS-AdobeFont-1.0: PazoMath 001.003 -%%CreationDate: Fri May 17 11:17:28 2002 -%%VMusage: 120000 150000 -11 dict begin -/FontInfo 14 dict dup begin -/version (001.003) readonly def -/Copyright ((c) Diego Puga, 2000, 2002.) readonly def -/Notice (Copyright (c) Diego Puga, 2000, 2002. Distributed under the GNU General Public License (http://www.gnu.org/copyleft/gpl.txt). As a special exception, permission is granted to include this font program in a PostScript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def -/FullName (Pazo Math) readonly def -/FamilyName (PazoMath) readonly def -/ItalicAngle 0 def -/isFixedPitch false def -/UnderlinePosition -100 def -/UnderlineThickness 50 def -/Weight (Regular) readonly def -end readonly def -/FontName /IKXQUG+PazoMath def -/Encoding 256 array -0 1 255 {1 index exch /.notdef put} for -dup 165 /infinity put -dup 229 /summation put +dup 48 /zero put readonly def /PaintType 0 def /FontType 1 def -/FontMatrix [0.00100 0 0 0.00100 0 0] readonly def -/FontBBox {-40 -283 878 946} readonly def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-451 -318 734 1016}readonly def currentdict end currentfile eexec -ÙÖoc;„j˜›™t°ŸÆÌD[Ï|<3322ãý¿ô9I¸fÈÂÜ"ýÇXXH`ì{¶Ú(Ìb$`á«2 GyµÀ<“»»É\ð&’ÌMê¨ÒêµÂæCté+Ë…B›®JvÀÆ·mo÷Ïš}^ß¼ éYTY½·ÞCÒ]Sü=ÚnðÂt9x¦Ð>ÌíJòêKÌ1¾‹ž'r6iÁ’Xï¯Ü'l±­é Š”6ÑŸ±Ã=ïvª1]Øöüö•mñO ÀCüQC>õ¨iÔÜOLÈU½O³*TšÊcàTÙm(™ß‘À߀ ãº1ƒí˜U -nƒAŽYXÖþUÙ1ñ2¨×èèÒrÕrúE^Oy¤"â;ôDmÒà{O0Ò {2ÆR¿BÊi³¯N´íÌMÃýÙöéufû–L´”o¬ñ¹nõÛâ>å‘Gl»_ëí1¶#<ðW”\’ŽïJºÄaâ½æ„_«òF˜bÁ†ŠÆ~˜ ”M÷PsRƒî“ÛŬK5B|«BuG‰ÓŒ8öer(ßVΫ0¶\òFÌ+£S“hdr=ÊéÊVd^ RUl^›0ŒpEÉTXÛ½‡Ç›í—ë]Æ%’ÁÒ[õ”áT8ݯœû%²Uæ~ë½25ƒì£‡µK÷±;¯Fª<´FS$ÆÕû²9dwY€-u&‡i$ªò¼(O´fvdz¦‹ hº¾Ó¹ -s;¯“Ï‘èB]¢rø„£œ* 3Ü”™ Ãæ_&+Æ ÿªñ‹œÍ4‡ôÇ9¡Ï^WJ òÐlÆ.ņä»Y¹~^à³¾¨“òXq¹=ÂË;agâ £CgïÏÃÄ _%‰Æ˜¾JéD­í>‘ÚYÎ> Ø‹’¢á½næׯ ,G#¢Ä²›‹5À.êFŒE)a"™¬²ó p‘˜ßqÖo-Ù4Ï`7µò}§Óº¤WÌt_ceuÅÐóJŒ—h>ËÌÿÍú¸µ>Wþ¾ªÎ! ôA5j¾-±v`òç-Š?R ïtl.ƒe)öµ1=Ìï–kdSã¤iõä!5Ëþ_lŒœ<Ÿ ô—²ß-^ó­ª+éÍš‘ŽôùbøގçÞ©ÈÜ–™nWÿ%yïg§õ͚™ý÷3¡N>âVpä9ÿeŠ.•¯oÀÖÛv’£ˆóý“€hÓ; ÂÍ‹ ÀfW´\¹ü^yÑ_øÜ“±š¢'Úî1︖œø]c¨*Îç–" Õ磺û©@A.Øê’æXïècǽD&Ê<¼r”—kö¢ ¾Õ|lØ\†Ù·[ne_@úÄ_TkϺ©åhô´.5G´ ô­­†ï[î‚\?… nä½;göƒà¦ŸÌ’\]%»ÐAí¥ÝVY 3ã”è×?¼ ³ßMhš•L”I•*nÁFçÕqÄ™ñu §Šeñ÷~ÈÀA‚õE Q€P+%[sÖÐVÔÓ$—G… YRœ}aâÕÉèüHງ´×÷õÃPœ  Ð1t'yˆwBî`©Ö~›× -‹å$óe1cww9üBî6$Ê—TĶ¨³;ÀZæ (4¤9g»ð6Èv¡¡f«´§æ4J Ÿ¿¨]5(§™Wð ø„šs¹Ô˜tàaȳÅ<[EŽ¬p­üT˧Uü¥!µ”JtОÄò ©ýª?,–ÎL?!w ˜×ÃçsŸðg0Môh¾BÅ1êä³Í$sÉyѼÏåÐÛç8G•º“Š(ûûbÏí>z¨uݬ£ÿ­·¾‡Do­eHÐòñz)Ž^0 -9JžÒW –©fÅ'ÈUâ€ÀIõ•c‡‡eiÒä+¡5¦¤ÄH,ârø73g0Ÿ3{#Y°«ò_‰E^n2ÖÝ¥»„ þš®Á¿xdR$;‡ž9ÎÒÌ(ìúû·ŒfôñÂ÷n*±Ï×eHýc¸«AóìI…#†y`©û÷µ%,tÅ¢\µ ì)ÿ_ãV£+µó7À\öMþ7AÙ4R‚ã"З…@ðOÉ!1í$¥¯*ϤóÐŒsp%>*¿È–, QÄíŸÉ¥H;ºEÖ×Ùº~b@Ž—näÿ¦ sשÌXZðÛ–…ì+ ”¥ì, -ƒy¡¯#ö/–!A¨§L‹9¦Ú8üøž¢¼üT»a/±äe£T´,‰ut/u¼¶ÀÌî6ü~¯® áº1ûQAÔ‚\ñSøâWä{N?2¤±â/ïóG½?ÚÆE£æ« 8¼Ó®chbK3¿qí$£þ;+^ÇB(ƒñ¬¼¨xšà¸‰k‹:wMá…XÕ,³Óƒ~õ‚Rçÿþ[&¡òh¬8AµS #L'ɧÑT‘p¤®ú#{Ûª½¹>É;+â­¼’MÞuU§”‰[¡ä˜%Vû0ºEÄ—‚Ù²ˆC9Ø…ÀS©×Èu^sIÓ2\ØþÄï’¢ÒÊÍ. ->Íj5ÃxBÄ’Úª]˸fóŠâ*Á÷‡g|Üšx*c·fr+µB!ÔÃH9Râ€2Hî4^ÁEìdöãöÄ>>ž›J1öu£¬•z¹&¿ÐMLLœ÷i¸Úo™Jè$!¸ótîFoµÜ¸§_µO>yù,ÞªôÎ+ø±#îD‰ Ùqµø¾ýWô2³û  tE"Q×IŸÕ…À§Ø.e]‚Çùl wÙú,•Ó*¡"ŒœMf]­—ÍÆàiðñ/FSóê‚’.÷ìsãç_.nÁ1 àÕ/€ßâ8,‰}̨ó ÍÊçðW˜“‹‡ HFPšsµWkn3‰áa¸9`6I¼åŒùÿ݉Wi¦>|ä =ÔÙÙ1u¬Åª4ÿ¤ÅÔ…X·’HAH -ƒ$ü-UÒÛÊ3¤³# +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿Z+—¢TXêÀ{˜¸ñ«ÉÔ‰ß]_ŽöÝW_ÝÉù(~ÿF†àv¶œñáWfE@ñ»Qëc»¯¢PNêd¥‡qclµY¿„§†]¾Ošå¡æ®kÖÔÁ r‹ lÈú¥n€8e‹ &ãy~¦8qE”<£ÍDÏb´]„Þþ$Ÿ<%ª¼GÜ¡Ìd`øF òž‡]æÍ"Ãb¶ZÊ9 'Nnð à¯]Œ½w¯ù90dÿQY°NS+Ý{M´>d¯Õó¡*Ÿf$_ÇÔÏ{}äþÎE¾ÊN5´Úw0Æÿa™ëmLÿ@°o²Ÿ­½praè,¿Yuš½vT fEC/øî -^°Ì[XâO¨åß2þêÿ WEƃØKQÙB:”ŠÒAvÑøy™°ug¶¯ÙíÏ=q“p5­—/¸…1[úÕÜ‚!l2T˜'9HCª¾a—€®ĉ€üw™±Š¯‰Cî3^—Cyœ’ÔÙ}^µÿ–…À 2ŸA‚ú3>ÑG|ºÂà”ãH.øˆç]¶J%KV`à8ß?½†Ã(Ì0Í÷ÌÌg¨óôƒêt,×OT7 Üà$n"JaoÕ82*tß®ìû‹¼Àç±.ÙøÄüÌÕ„ñøeÙþÌ]'2õçb×v–ó€Úè«ëþÐi“ß)kŸ+ã÷šT½:CY#M5×7¯^ÓkjÈþrcÖTÑDd¤;y9±§/ªä£EêÍœŽ‡îåÐIœ7K| òŽ”Sö­ÁÝ¿[l6ƒáå†ê«÷¥ +K|ßOV`bàÂß'EÙp…Öœ§höº‚ÒõÛ(Ñuóèfè*Ž†—âéþ­ñ؈{£s’G´Â(\–h‡M9?Šª¬5 ›gÀ|Ü 9û@£?ä +â…;ßò^;Z]“‚Ý_·pßêXÛæÆêì¯ô5Mlî•pCŠbØÁMDþ ¢¼›†âC³õ ![]¡ÒzºŒ>¼¦¤§¨Òn®žÔ¥ç˜R>X5 ks‚•Æ¾l© +yHRëàv¸ú,#«á#ymí­B[Ü! ºY¸‡Ÿ|+á´ˆÉÖÌ@w˜1Èy0¼–+[åÇç3Þ€ º©;ð.f¥Ká›ÕÒ$¶w:¼É; ×0´6‘áI@œÛ3úñù€Ð]…3Š}A‡íEêŽ:l9éÙñ;fQIïÜé1Ë×YÚÿuK­YßEXÒÖúñfhÚêƼ½mxâÓg{šG9ÆzÏ‚ªÁi¨{š£$ŸÍ@í½”–vœÖß2BG™4õê˜#­=„Ì-*n=׬>¹‘paŸ¬\ßµ ¨wQS©RÔ9 ~Rã~{ãŠûµ“,VN…Q:”¾‰BÞÊ/¿o©,ûS[-gÍ<õïCI³ÝÞŒ½¥¯Û$d…oS¹×줇µÔQøfD®Vôc’ÙÏH9>S9¾ÛjÞCa7Y¶‘FÄC;™ +²gÓƒS°Þ“¼Ú$2hè¨HùýìÆ`N}HŽÏ+iI9âHŽu»õ—ÛIãô&Õ–ÁÙ‡‹J&HPn‡Í…èLTä|DŠ°îÌËÆ쎂j²º'ŒÏjÙÊ‘?k.&~§wÿÖ% ‡ÅRF¶‘ÕϺ·Á¸o>Üa­Cþ›ÁwÇÚÈÚD²e]ŽÉ¡>#_Œ(Àµ-ËŠ;]‡lƒ׈ ‚î©S \Z+Jøú CÛ<¹ø;ê–É’Z[ H£9•Z<(*@‰ðLî5À´GÄ:ŠÜx×›úã¨òÖ{y’PÊñ ‡0˜µ„DyQ®‡»µ/%¥¥ß¤]°ýù¹O 3ž¬5ø–Hwê"ÙåÔ%o14·ó¦,ýŸÝ¶Ç 4Ù¸–Õ?F)CžkŠ²·Rûõ­nò©¬kêô5‹o +ul¢—!ÖxJ†óæ†p$ ¬ÍÅKNgßò»ŠG–´¾Zs6^m„á¾ÞûYës8磩}‹2š² ŠH¡UëÕ +]Á‘ŸO¸í$ËÆõÞîH;ºCié_Œ'6Õ[$K¢\xË;í^zÙ8?‡!7•‚&þ¼†¦•–ØdD•¹§tÌÒrjpÞ—D+Ɇ‰Ð¾µ¦9ZôÉ€Åã“Ãó3HTÅ×IVxÞuJð®Ú'8Òü®/,ûHÕO@afP²C4ûCobž­*³¿l7Ñ#×±? f)ó'WgÿsJñ’«+) ë‚c&y½n2l°PÕŠ‡€¯ŒÅÊ¢‡ ©Èð– ðP€øz›`IMô ó”” N0LùÎV0ækëÈ7%¿ãHBh5>¼ZY@U4½òœh‰9jD‰ÐFÍÖ|HVÛx6ÑÕ‚çA ´{MùhÐŒB«ü—à€|Worr‘6•û˜ìúBÊî4·B°Rlk‡*Øô6@æÖ—‹1_r:~‚–¯¿î‰ ŸŸ—å«a-‡:5 ËÌÉ5hÖ» ;”ÄüãzíåŦêüì»á6`Õ69øŽÞɺ²Ý±§ }×LM¨a3ë1>pÑ°‘M°ªŽ´‚q#Ó=4ëùnU ß›a5|”F¦#<¤“œ,çÙÜ ‡;N`,mJV ÛClòª¡Ã¶Ö˯·fuLŽ#ûÖi¿>£4~HÂm\ì=c+µt$¬eÞ&«X\ Š>À" ‚#ˆªTräêéE«ïð’®² šx\¾ºÇ5/WÇÒänT×Òqéæ)AŽªP¶É¯mn®DöBÞ®âaÕ?,&ú íäB]ô­]PE³bÁÑ–áþµ-ÊÖ½w¾)¥×2­ K½(]äil°?~6‡Å÷F kóó¥å"µÈ}áÙu° ^«ÉŠ±DlÞ½g‰œG™—`§³LÐ&¤Rfê`…V'3‚EïMÊ)‰LÛ–;'#gtI¡W#Dú]˜ö'…rÜ£¬Ù‚”OCXmÿ¤ŠðÉ6 õq%QmË =Gdb¿ºI6«…!#[Ó»¨>ýB6V ¡Éh’ÍcÔU´øíIF&i ¬%fd@#íaù§QÕ#02o£2©Äv`윊Oí줎 r´3åó,iàÔðPfQfroü3é“NY³ ¨¢Ò«5À½¯%çÐ/¤F‚–§*=(îV$"Ûð3s¡¸8;M&”êúøFz-Yqɉ#ð²~`þ1ÒNõVüýcqúìôÜ™Éåæ¼o71aiAûâ<¦è„‹1ÎôèüÅHæU×qnÅ÷4ëMN…üØÝ ƒf>g¸Òœö3 ùŠ´6‘$Û¿7Ê!£y§{NŸp|jš.Ôß ?CÓVÓX Ëà¬=½OZ{ÛÚC³Š8L-¿Y+ ^©šUØÞluJaÅ[Ï*ßðöŸõ€[TN$‘èCÔ‚]™¦8…®«ûö‘ý} /¥‚Ú}ëºbµ¾ÌèOWÞµðìrrM-¼ò€å×ú÷¶f.+’ÐŽ‰ðæ+ÏæXÂUt×96{ðW +g»aØGrI4£×ÁÿHarCœ+2è)Ñ©kìÍ©HL‚õ EŸ6No­¸·8׊G²XÏ»˜ÕH0Áþ’wDÊÓ/¾ƒøÁòˆ+$jÞn…oš¹ÑEÉ+’:Æ„ øž³‹ÿ[ëÅG^žä—.•$Ü>°“¢y=U3Ef©Ì¯$|pZ„îm¹| +ñ—†ñté>=ùRŸpÒL»Ç‡Â”)ýÓPÈá­^»qí— ¢7k•ÀþšÀ:Ø \•¶‹ãyý˜ +ºFÈáèÓaî„…u¥õ"ôÈɀе¨sv_¬T*Ï;Ɉi”¼õJPØi±s4¤E›\''†>…œø*UP Z‹™(ÌÓ³ Þ# û·.Œ?^f£ª¯Êæ 04â(ì÷´GÌW`’3ýೌÔì©ø­½÷¦XŽjzñ•Ã€U*5<ÎPáãÐO! ß—ƒÃ—XòÞµ‡.² ^<Çç›ÝHãÚïôãv +‹AÇvŒ·˜ +ØÙ…'µí[~P÷< ²'žÏráìy ]9®SDRwÈ™þ†åK‹Û¬vÿ„àÚbÜ©Q]n‘’ w*ÉÌù‘gcšt²ú~ü¡¨c³Uì\"X”ƒ›OíWé6ÇÀEz•qµ æ“)O!Š¹ø§Ô^_Ëšpü¡¨c³Uì\"X”ƒ›OíWé6Ľ +Ö nÙ‰Ú*W^¤^Ú.Û´dôÅLï%1<׿/GPíô´dF@¬B‹ý20ˆÌGÈ"‘vp0ðBy¹BáÝ{ÅÑ µ+p“×…ô=~÷»©!XÃÍ6î‰ØŠjæ pAûÃœ\³È£fk¿‹¿½$Ó“w§èjĉm@ŠMÇØà& ŸiÒä(áDÇw%ü|Ï”‡-Ïdã¿A4 m•`ðƒA®g[ùéî—X¸è8]kRâËO1MC€n‚VýG®kÿdô28gû|}/U¼R¢ùæ¦0¬AéûêrÃÿ 4Ø3Îö"ºº:‡òP¯œûÁ‘Iª:¹'óÓ+BÃbÆ\ú÷-%éXýSmž¨gk\DDøÞy‚k?”Î* ò<‰àf=Ei$n“WÌ!ØC—‰ýRýjì%¶`UåmJ³ì´¾¸Ò”=3(ÿ¨1좎†,x91*¬ @®}É;øÔÏðl[”n!Ðõ§™ÍÿäÏàNIâ?øEÏR]qƒñ ¬„u¥ÂLaéRr'ÞŸùaG÷P%láßdAÛÏ#²ƒ<¡5’>¨&Ì…ÌÉ!Þ¤&’÷§¦éÓ¯ô'ÂüѳFÝv–C‘"Wü>¢B²QL$ÍtÒ¦n›‰ˆoPüÜ'½ÿê'6Ñè)ü¶»fÆoàSƒ„üÙ<5ìx¼²‡óÓâ¸ò™ìÏS•ƒÖÇ~΄q@¯0z6¿a‚ñWGMœî +åñ0Š#ºë$l:Åô+jé£CŸsäy$¢I¨ìàÞÍ­.l³2åÉ-5 aŽ +çT”Ÿb);4ÿ–ç$XìˆÀ/rÝTJ4A‚i¥Õ’”ÒvRû(ˆX`ð–V,Û‚aûÐZŒå.6û^Ïû©½ã€¶€äÈ7'ù=¢ÖQsG`¨’9\>KŽÇ¶TÓÖkIϹ·3Z¯±²Y§±À›Ç\\òæ€.¬F 5*„rî& ý»éÞ›tœ_RBÓË%<6¨LÈ7×k+ Þ _“ q)ýO:«E^·ØWrÖ$:‰©À°»˜êÓµ­«¸0Æ…¥ßª´RýëýEow„OÂt8öàÁU }bÕ$ܺvä›­ÙK–nñPfBËïš×QÝAØ@B{ÔE|0Š<¼ðüüTÈ°ù Ì@Ï–ï><:ìeªTŒxªxΕøn°à³Eˆ‹gÎH*DˆŠÐ8Øyn¨ÒE=e¨ÉU%oÒJ.B·fÁ_¹•‘)€pr™ +äóIwÛá·ý9ŸVº]·ü¶Ÿ1)ѵëX„Ãe>`91ðT-è9’—oÝWÑ|µ–Š8KR ýˆ•¸NM09RÃÖ:?DêwÏV5ñÞLÖæ-ù×ñê¦Å§k¦³Bì’¢ûÏŸÚמ*t(4‰Ó4"¹ü2¢×Ú£ÈÇ;xñ÷<«L -[™Ý²QöáÊ.l´ý”ðB@ŒÆ#Vˆn›'¯±«þbŠegvÓ ¢ªïð’®² šx\¾ºÇ5/WÇÒäeØ–A/‚r:[úê +#X—å}Ÿ¦øºoDØ£  ëþß1—¥yj#¹'"~Që÷ü%à´Ë’ àSÊ‹p«ræ¾gk7cŽtåKÜdÄë¤ý"Æ}|:÷§®âÞ]éMëÐ|õ +^{sd{ÿpa6L +ë +¯ÏŸ%«tÔYFÄÔ Ÿ£Uêbì÷m'Áe³»+3Ú?¤A¢9£W½JF½ +†ªÏì|-LÏóŸ¬~µkâyãP4Iê­Ç‘E?8$Öû¸_ܵtjÀÓÊš…¸‹·²é%Û3eÈ VàstÍ"Ê~¥æ‘ÿײé;¿CVÊ›¨£Ow²üŒ÷=‡}f PVÿÕÄäáóàn>mðÒóB@G¥ËÛˆŒ„­*Âí®©7%J'ÛÙ¢  úk‘—§Æ4æ­k)ÉD‘©Lú›C€Ó*yÆÌi€$^ÿÚö$µ&…Ä+KÔŒ#¥Ç™1ÂYÏL÷_ö~pÑB`õ*ç<ÎìÀyè7ÜöGEñ¹F•Ô]…‚®Ç©•«=pºÊ@ÀØf£ž}¸ÉRA€é â®ìd"cÙ” D5…àBŠªÁÊà#å¦ý•Œòü/®õ"IPa]¢N³±Œ‡¼!ØšŒv’eÀ< +<¾æ$ éFç«s^ àÄÞ{Âs’סâ€Í–êÍ‹¥=!\j‹×—·Y€çõ@V\5be|Éì²Ñ3i³lã¦"¥Æ­øÆû+Ù4x5ßåãvÚy¢>‡v9-m€ÿ\ž®.2óÅ.P‚¢3·Ù ;ÃKq}}ÚÈ̪‰YKýziÃÞ3—åîíL@©±ÚßÆŨù…~NÄè9æirú|6Tœ—m¯9ŽßªÍu¥$) ‚Z­ŠìÀ.y3áœÚ–Z‰y¹¤ÕèÒtNþQv:k½A§­ƒ²'Íõa©AExGÅì À"À‰«<²¥Â}e$ÍÚ•óy)îU°2¥ûþY‘ºÂUjéš¹¬ºù¼ãØ,Äwr{·‚ ^üI”Ã:‡ÃÞY7ÔTY7Qí×òmÏæ¦Ëeµ5»FÌò8ó”ø½r'›Ã°ÒŽúO2è·”åÂ~Ū„Pr­ñ¸¹üæ‚B^Ì@Ô!;ñéãó´–¬w`C‘i$qE4F?%×Ë(áÃåhÐèõÝNTÌu²Ó“ºy¢)†,ÂôUð0»Üœè“rˆÛÄùfaÈÓý½l_ƒá'»à;ꆴSVøÞë{é]»øýp‰åÌÔ6‹ïUj(mEåÒ=×ûfG&!:×»$ çß½ü¸¢Ñ1AãXØõ>4× õÃ$Þµb?ÇÙîop†…ƒ)ªÓDZÛÝ­áŒÉJC¼´â…ç̨ê+½2D@ŸÉS~Z^è`Ú7G©ò}ÿ@ï6@É MmS_MÀªªŠrhçIÛ¬±HAo`œ'©¨C†%tÔ‹‹dKö!á·wWé.¢õðz(^&¤RôÒtm–~-M˜._§ õ~Çß|óž¨%æÝŽ§¤•ÜLã‡2`è(x—øÆR†Ù?À±¯‘ð$˜)7¦°¥£› +4«+mÖ1›Ø‹§(áDZ^x8p +š¡"‘UgŸa†o6Yf¯, µóôi¾8?G»Ž$“¤â¦`V»øÄ{×1ñrË/ª ý­@½:çÖG°ÿÉ+d{xš›Ô{] ÌÏÏòÃNpYÙk.vTe%¬Ÿ~æêt÷ÀyÇ·Vž`…ßPŽ½ bƒ‰EmNÎmpV­ÄŽã–u×Y³}n¦~S±ÔÝŽ™Î¶'vø¤\)@~½ð09ñ²ÏF&` ŸRD4½Ü»ÓTééÚÃG3 Þi}ó'’V!¬N値–±/Û5Ma£IðSb¶Žq(´ 9Uò•K!rdz~vˆ"?ÊÂä1î?a†øW²û/@;êÇ ôrËO>‹…$…ùt¢Ò[àZ燸*¦±6U•Ë¦Û‘‚$¯£ƒ{/ýÃqAª7Rï£xSq5ÛºDÌaö®EÜÇç…ë 6 œÐÙ¸D&ðêS^+õ +¢ü}KM*×¢úŒGt81Uÿ\ rö@K¦›ÊÏŠ³öuø%Oέ_˜3{sRu7¸úXøo2ŸM´4ÌÓE,Tþ")yï‡@ù"ñ ª:ÓÍs²²¨ +·¹$ñ¾«Ÿ`Ú,@ï–=R%­ìL1-dz\òÙðô’£ÈÛ#cÇ9¬ê´dït§Xyôo¦CDž>™?ßÜ"ÌÀ2ã‹^Äuí*a”š_/¡^°k¬)Wq}®ó˜mEZÛ¼_Ï÷¹XžñŽJ5ðG0 `tw½°þÅJৗ%ÚcUç×/ê¥Èp„gp‹ò£`£:G>mcÓW{,™¥ô] ûqý-´ßÇ—P=ö|´sw©ž»Ï;ï¥BM$:¨¼~‡gs#c%^p@õÞyÿ˪fè’²ä:¥¶É."#©ÖSÒ»iÓ_?Ÿu[ M]¨?áy˜!’AÀÚgå¾À@©X±K³×‚¢'›‘ŽŠè²ï.9R’¶^ue¬Zð"–th%Ë'¾(^g2–¾W|à làöPòXŸæ‹)š>—£ð³Z?èß[$Ð@ÖUG ìqÔžÌZšñ\6L ËˆÄïP›eðú‚ZáÕ^‘§ŸžýÎÃ/‰rßF _c™ +)‡~ÑÛ0zqÀaHÄøÄ=`àÖzC#ñ}‘xÓ$‚¹à“ò<ªÈ÷^¦q%Ç*N2FÑïô.#{B4£×ÁÿHarCœ+2è)Ñ©i«*v,¬ÞãƒaMVM>±¤ÝLF¹ÛÈŒÛkÅþìYgrô¾ºv‘H ~5 ˆI©n÷‡ISÂ$Ÿç a2E5‹ÓŠRóI#çÆ>œ¹â„Ús<§²‰¦h_8½Ô]7ÛâsëÔÕ%pÐÐ5‰[´©‘ÄV4´wÓ»72å¡+ò¡¤äÕþçL„æ=è†ÁÖýÖÓðÀçîRìZŠ +¹¥tš[ ž eüM£V©Z;Î;Ha)X³‰öC©âû&+ýw>†rWƒ#!CøIq;ô¨“2!ù]蔨ðû9 FI‡+”[yÕç1ê7%r²A®Å]4F L° y!vô‡ûö%ñ{Ú$¬S¹PÞØèò(Ü)Ì–•§p èeHn^á1™hü G™Oo; û‰ª£q{óÍRçü†Èþ>Ó?Dµ£…7ê•Fê<Ǽ”i:F‡,0@ÈàSÃñî °ª«Dh“œƒ«tD~š†…؇Š$émnX?©¹¢É}"+OÕé¸Kš&F¢h•qˆ8§|æ\¹¢ÿ´Ï@º¿šû©* Q® †”àƒáÓ}›ÑG¢×ÀÖ2xþŠL;q`Ð(¦¯ÅZdž»TQ¿¶ÅìÚ׺¨ÎÍÕƒ°($Þäí¯— +³™!e}²µQÿD*H½IüpÊöÿ¾· +èx¹ýFt;6à³Â¾v&R =[ZkoÏ¿þóN*4q&êÍ7rVГy8Uó6IRÆ“ò´•³Þ|p…K;ÿ:"Ù6Ö3Pôyló[ã‹û6ËòæÐ×0CDáu1ÐÆ" öKaS±«FÚ,AµÒXk'"ÈprüsG€ËÅA=dÖQs¬ríOô°åºÛnV÷´³¹Q;¼Ž:SG²šY²§x€ñíÜ7?S™ƒÎÂ$‡K·Ê˜¨pÁ=¼å Âe‘±ù¡+sÖmOcÌSW÷UÄŽIôñJ†E¦Cl@±¬ ųv‘}ÖªÄùXÛa´­L}¬êÏ%½ùÛÄ+è–Ð +5ZŒéÍ7ãшa⸅jÛÛõRTA[¼wdÈkÇ3xLjW|4#øi€GR²šaDFÎâ%Ñ~ +ÖãÖªXîá7e¬Å¸—m^=º£cýMø"dŸ²× Jü `ÕD&•K_:¥£çý þ .;.S%3e"PR쬅F9pþѬê[t³ÁFƺáeXYûX“KýÕîmïæn¤ ãš9·üm b+’^ÉN‘I7j}>%,&÷ŽCö;gˆ4åÔ~â*6vUû'ç¶ç'öþ°ñ¾&ú&Â÷æMY@Ôìn(Ž›¹ždYX麾vãÀ2}!ýèˆR„ÁÆH‘â^˜ƒ‘ÄT>½zÎa «Iš֢ýÉʽn! ¡ÝU4¢Lš ñ¡¡ÀÂ7 –®;%®ßr´’úˆîŽ‡à‹Ä8–œ.¸ÄdnHOzS¢¾v¸ï$øŒiB7Ê·w qp´ëº¥I¼ƒ*SEÃùæy§ Í[ ©>¾‡ã÷$Ø”¸H»ex(ßB]ÎrfÖ©Àñn«J2AòŒôYn¤qtä +¥t‹ÕÝmçN8®t ^gõt„¦4úü¹©¦ŠrÏÂ#ϪNZJþÖ@·À0Œä†=‰ÜbüXoàF±•–\󃧖,0VâüÊ9­x¶ ðíS¸ã壘^›)Ñwß ˜q8`ZdžK¤Ž{¹Ðð‹úXª7é }J=tëÛä(“Á/“}çÍׯ®gtô;‰­ÇäLìF³Ö~Ìj*ü•¢ŸÏcÛÇ6×8ñši+Q.afo¾Þ¢P(dÂÎ!m"Ì‚¶dºiIkÒ÷P!ÊCÓŸ¶3I"±ÍWY¿ã6›A‘?ýŸx›ž'Ä;qQËî_²QDj.‡ÔåÁ³på¶ÚÒÃ…ý´Þ‹9wðy*K©!Ï8K’Bk¿;Õdè!%"òi«gOæà‡þ ÿ}‰:t™MWgd©Y.y¿—øÌÎ68¡ò2ºîpŒž&––uõó¹Ã:±‹ïzM-­©Â–u¿Q?ÌÔWQ ’ý§¡yf±£ƒ¼Âý¼÷þ°O‹‡¤ŒcËü/B3Kþs5ÉÛ4õ>{ Â8öè"Ýyj‡¢ø©ï' øŸBØêŒIÐ\Û<ÄÌv§)†WUVÔOª'A¥ü@Aßzo †c poÕH³Èpúéy&ǰ󷣞ö0'Üs9Ð÷úét”]÷ƒ¦wÛ‹Þ°V½Víõú›MË÷YÉÿ¨è“ÂŽõIÊ<|nÈLÀNÙüF¦€ÞjÓлõïdJ:*4ÔOýp#Ö¬¦#ªÃYife†÷Ì’õQTi.ÔS¸1F<òÂQDʢŪàÿ©í$FÿCœ Öøú1çb´Ü·‡øËùC¹È~®€T>Òq˼'JôÈ°³­¿®’´)°n«Ð‡ÊhlÓ­ÕëªDòZü·Œ5ž»&ë2BIà«´Ô½¿Vbü¾i†@¯kABh!9ø_ÔOE §Á¢LïÈn ˆNà0M/ˆ×ºæ- Eø&µ'¼G!7çJ©ÀB¶šº{ךƒ=yü=Ð_×´xʹv:íˆÿ"Ž¥0€ÆH(MUËx0uϲÀ‹OЊSë†yCv#Ÿå@Ô¶1¢\I\ÞýK +Å:±7‘/³ˆùY&›±´·ˆ¾‚®½Ás0}wß?fPX·½Å„¦f_ŒYÁ¢ñOP­Ìn!I£ùåË¢<˧Äl~0ˆôÑÉ–¨2ë®`ªCfÂà ã³©0<É'f>F–2'.ÚõâaÇ}A0¾?Ðb€íë©vñÝP!^ pgñæ••r`2¤ØWÈÜÍ#l²†À£á \é°ÛP{Tغ,ß…žÜSÐ邘M’¥Ùf‘_Š©E§0Z4ïDÈ4¦zá1VŽX±àÌ‚¢B7¢õZÖw·6a„ùÓø\:CAvdS§ÅÐ7[Ñ£±H¯ +éR)vX*ê5¼@õ£î!¼–í€Hu¿ éI}Ï„Sç`¥gM0U#Ó>AUPƒ$wŽZJÝX¼Ç,¯®Ñë‡RrEÜñ­¯ ÷ÌÔÙ«P nî7ýûÈdùj@¢Ä³f÷§óaùGƒà^Ä‘ºä»¸„JPÄLš Ô6&Pn¤'‡uýÕâ– /Ã@õþVbð-`•Ñ+©5*Z–“³9TÃŒF _iži°¦É°…DòÓ¦ÚßÓ…ð°Ùº»À¿a[›«é}'•GŒáh;C …RÙNn5~„:¤Ò§ÜD«°™üäÚe¤ÊåÏ8ßLÐKK= _r Û«Þ÷—Q먖‘’÷Þ“31pWÿ1VÔ&êŠÜˆÅ=*t·ÖÄ“»RJÎ…AõUõòdK5^žqʯZÕYŽ‹·rïsäM¼}H<•Þ§e_‡…•tk$°ºEØá9ÇAFøaˆÑ†%z:jooÚ¯ NJvÔè½!¥Èá(“Yçò08ÖÞRm¾((?اG’n|õ¸‘Š,°$?gn âÇf…OèT€˜Þ+ÔÄ‘åœ3ä>;Âfö;*¢M‰½Ö"Mú| G©lÄC~_ÉŒD1ÎøkØ<\Sùéç{i‚çfùž…ÊT.Ëÿ²=zuÞ[l‹P d}îÃX‹t¡>/k«« +0sàÑu'âŒ#Çuȹ9‚§í¢„à·£köŸªÉênó92ײ( Q¸Ê­ƒ^®¡+0íߨ2•[è÷TÕ$RH¬ ƒ_I†l™,´DðÇÒw”ú¸,çäãçgÊkü +4¢Ñ—(ú³‚?¦«¶ÛcÙ`‰¾ŽÝrE®d­Ç!êãC ã°™³\AíÐWdö5Ù7=óZOI\ùÄÝyô‡Š:PŽRfã…Ù/ë\½ãñÉû8AëÏäw¬Î/à®ùlrsfçfƒ¸Sñ9éˆ<˜+K7\Þ<¦Ýq@Nm”QezÀr< íp%¤èàšÞýGÒgr,hó$_àq"3›ë&É\ž ™dh7Ýe²ìæÁbU\:‘ô÷æA=pÿŠ'*qq}¬ïzãùæI/à˜ˆ¨1§¤Ëa0C"\håÿ,ÔxÅ>q ÇäÇvõÙÎU'½}¯ÞŠÊ„MÓJ.ÁõÅÇrÿê§ÂÆþÕ„ÌöásÕo=<ÎÁÎøÀ e8ÎÒz¯á—©–|‰‚lËçq¥4¿Ö.}n<ˆjoQ¿²‡‘»õŸ°½ìc~—-:ÛÅ–Z¼g¸¥5U¬rHßýª·ÿ±wu#§ª&è¨ìSM¿ÔXòúƾLEÿ7—5KëZ-gÍ<õïCI³ÝÞŒ½¥¯Û$d…bË3”Áh‡ÞâËÃxìJÇk©ÛÚË¢xýéï°2u½JL …³¡m¨%ØõýI‹P H°3Éìh:°Í–Äg'Ã`ñQ*ЦÇgž¨_ïw$wÆ’Ï%Éšù5-0I†æt)‰ 5Þ¥_)(A1…'MÚ¤À@tC¿º~s¸T!u®qx'(¾Þ"·q\t­®Ñ ÂEÓ^¡™#ú»KcF`ù‰ü.o¶ŠC¡15-u#Ó0Ž%•âqŽ$/Ö}iå·Ñ‰ ”ÎβæåRÑö19=£ÌD¥ÁvÝõïÁäÓ¤ÖO‚Î\žu}'èvrº^/EGœ†ÿà~<Ø©BÖægvýí µÔB3ªÜóðÙËÙhh²?(ˆ¶X½òžøeØÏÿöcô&HUF×"»£&ÔãñÜ\Ý˱%JÒE9*ˆ#ÎðI©C““¬K©ò´¸éYè‡|‘²¢GWJÄ0Æ*ÝlÆû} .wù„ˆ¸åãVCA,zƒaÔ*Üð¥ùO$Œ( »Lƒ†¬èB?¶,öý’“‚4LÛ¥ _¯E±¡Ú®”ÍÀ=9ó‹5ç±àR'»öˆÃEøö·c~Ì h™¢þ¹ð4+Ä•(@ó[b¦.´££ZáølâðÀ R/l³¿µ  Ë¢†6°œÊq=_è_gƒù?š"(cí žd襒ï—Ö ßüÒÏðÛ.`¹; q÷?˵ ‘"áaù0UÃC,9ç²È„9¼OT±mh ¼†ECl?¯ª!Èo·´¹Öþ1˜|žÞÖ°ãEóiL”ÔKàZÀ µFd”:Æž#UÑœÍâ'qöÛµ7+¿È­Y³öcJ³,$U?zð×hÐãWÊ:GWB##ŒVS£¿Ÿ(ʽÎhÐ&ÁéŒ0$ !Jõ¢ð6ô%pã_8|@e.´ä7c«qìojæñpN*Øm¦s¿U~˜3Kººûý!(]=Ëú²ô¾f|jOÊÖiC𰽘¡¨P(¥Ù†¢µs˜TòFYCÒ~Ò%óa¼†)ù~£ä¨d¬šþðÜØTʔѕ<ûl,k–éíuoÊ€Q?¾bÓñ“uÖå¼%!¼°Ãž6ã3m®¡¢)ÞØÚ]=$‰£¶˜fãnC:)½èn5jˆ ¼à ioŽ{PÏZ¦àÀm^L:Ö ™WdõëƒÝôjt¬” `áÅH-.!)\†Ë`Äyu Å-‚t +Ó5žˆFcðBçj¥˜º£+¬2¾¥®ç°«É;AWÞ¸þ¦ ]Fô?½X¸Œ€Ë U!¤Ž#Î:‰¨íÕæF»ì·vR„ÜŽæ7Bܱ²å(Bø áì/4Â’=* U ”ø€q[‰³+q.*5rÝ E솬X Õ?§²C³Â‘½ÞC-¾Eœ<ÇšáÒ¯­¿]bZú)’cªpÈ9nŸ{:ÌdÛ®óýÍ:€À«”·¦sT¹VÃìº|øn +Å’3{Lé »`œe2ùÓ®sf;¤ &àå~—A!®„ûwCµ÷>ÝÏÞ(êaÇImؤð«ì§-m®, +eäæJÚ±ÝÕ7»UqlÝç>D¬qN‡i¥Ð\ù×Õf¤<`¾U"ý2.R>Q;Ù¨ÄEù8¸Ôa;ÐÛ*påð~À]mƒ÷¯ŽdIî…É•l¶ÎÉzT»ø#)DX-ߟ`$.înÀ­Ý‘'HøîT‹K‡±ê{`æC‡¢¶16d£\âìOÇ&ý“ì/®ÀúB$±´ªz^£òâÅz Ãk¶f}¯½ ì_(̘:®3X>«ž²MkG¥¡æP4ÐÌŸŸfÞ+ä\-oÒ†DEmc#(lë=ÌÓqKð0˜®Ó†‘Æ—™}vš2³Ès”}Ùß<ÓdŸ÷È-ö¢¡Ô\ƒ\°–ñ÷§¡^¬¡ôsÚ¡7õLÛ ØÐp›¹^Ù1jŽ äó%!¶_ƒñR‡#O ÃûÛºÐ=ªNé]5<ÞÒ^®÷1b¦—iŽÝ,6~Õr¼‘ }ȧÈ÷êèP*[ mÒ¡ú4XÓŒÒèQ”vƒàWaó­§/Ÿ%}‰ãÖ¥™Ì˜1l”pø‘›ïBl1.Œ Ë? ¦”ÚGˆ›áW ¢«ŒÎ…R)“aÖ*+ÂÀÙ›hh¹yiïƒÂí=bˆ¨ÓÕq æ×KjO¿¯rEØðuít—úMÇr {¼8Èz¬õmX4†èÝWã^HåŽÜ¨A]p½TcÜdеÚS$“•”´¼}öë:Õ¡þEzŒdÄŸS")´›ŒùãáVêÑ9?¡Sûñùãã.WÏmÝ?«!=Ú +Ssš¢&åJÇ.:äCeÓ1œ!4´c"÷ëp+âÊ …^SœÚ¹ŸH—q9‚˜jÇ<Þ³´v`I`ÂËrCU‰X½lGÍ)¾Åª3«…(Úm8µ‘ L@nÙ2„¦1é +ti}3vyU6óåeJûf2ÓÏÕ*ª4œÉªÊ7Ö—Ìþ¥ö.}Ý“wHt–·QéhVc ªÛ®ÿ04‚‡ÉÃú‹4§fTÝzÖ’p*ñì›^VÛ=ºœC«T +h/õo£í-ÇËw‚:$iݶTÝ;Ë6Åû·ÙôŠjŒ3Í€2lm°óݵí•nQ±_sKž\Iß(eó.ð‡~µw£\äðEå/ì`—Ä?5À5ÙÖ¹âVqù¬Yš@Ã{$9Àoۉ㘯câHàzÖÑ *[ˆ$£Ó\×T ÇaؼHÌr{qX“B7Š6n³eJÙ[ç^9¥y½æázÔUÓ´æJ (Jèù¸:±zy^+àÛBäfSDžésæû¦Ðü0R&Š”÷‡ãç&â=^¬”ós\ß8­Ñú@áÇ‹ŒvW7¢ Dòå ˜/OØ:h. t6@ˆ(»BP{RKb/¬‡±œèª'_†ŸÂG•…A6°íPêŽË¿|‘×KýÌKl¸ÃtàÄ[å¯ê +EbRÑDfHÁ'¿^Lu×¹BoAš1½ž>î ôµ(Ng GM¢ããúI¼ó,˜€¾e Ýä‘”uø‰(Ô¹¶‚þœÜéí¤´6‰ÑgÔÔ5Ì:(ÜѲù$ø˜<œûBýmK×wÈýYºLþÃ3¼ýZ ;«Dº†þ¿Ì¿ã{žqÒ§±øþ »0'þ«à@À—$aõ“Ô}à¢<<„Np÷&|Õ F¬Aɤ…àMÊsOÂ>O@ѯЭéoþw} iÒ~7¦ë{´QFŽú ý}ÌNX0J¬-šÒŠgçŠGÏ‹tEÛhÎ2ªÍo÷ìEò–¯}ÍRØÛù§RêˆU ;Røˆ]ƒrÁÁ¹hy¬g’èBj3Íx¨D [`Nlf£j]½ßn/jTLÏóoŠè™r*ÓÖ&GLjÊ~ÞCfÌtí ;è¢2ÜþK pª:BìÁb=25<×hJ5"mI)î|Æ>¨ùxc艱?²»Z¿…±7w(XѨšÒM­6{!Ô·KI“"V6™Ô' œo‘ƒÄã~™U¨; B*|ji’¥ +N$MäO2¸EJë­S©3éâìjxT5‚®ØúÞ´¹Õò/¯BB{¦ò•vë]XAÄ1^, eòôb‘AÏuoJHLSS¤$z $Ñ Â{ñðjæÿEªBùäR"‡+kzþe á9IÍq^HØæ‘Y6KirÔ’2Ò)£¤:˜v +WŸˆõžˆ¯éæimä㜠Z‘šLlÑ1&¿ÿ¶„C€ætD¤é9_Èÿó¡BqM`Ä»)èìɘñú2á")œ\è¯GÓ¢¹Vúì÷ Ø'7@VˆÏ’I6­hVý€†Ð[M\ˆØ‹ì>Ñ0ST½0—ùý”[oyÀaß¿½¾/¸¿K¦:6?XH±äïÒš½Ê7t„¾(D¸`½’œÌñ ¡*½õÆ<[‚;¿ÀÀ;zB„:̦,u5 œTm’&§òæ)²`cƒ’|ÂsE)é$Ïe"oôÃO &¯îíú¬^×3»Û‰RäMgwÙå|Z¸ëÉsÍ9Få SWå|ØðP÷`“ôJIsÝౕiÈ?ù9r=k–¾×áP°«Á”WÌl `6±RnyîžÎqÙNèpLí•4to³3 å8þç³J §W‹¡ÝÒ¸nHeñã+#VºCʘ Íèy‹ÏKÜmflÑóàÄ^rYóL.­àÁ+2O²K[¼wdÈkÇ3xLjW|4#ýkaì€|¾‰ˆ2<ÇN×ÍbÍ°ã¶BùŒÈ@lXÿ XȆÒ€P=zyhsÃ7TÊ&g¹‡)ï²€¨]í/l±æs'½“û = ø±ûàðÏ„lå‡ßüsFÓõqŸyAì‘#_Ÿ’v™*0Ö1²- t·l8Eî¡kR‚§7[é—ý êŸ<îò iò÷ÓRcX<žòï*  ±É>AÛ{ý´Ü\ã5ZP­Ë½DJÝ<ˆ3–àÍ/¼ºÇ‡ ‰É:uo‘·WªíŒç–$i@F>-CX­³ïeHÎÄ4‘ Šê¢ôˆ8uêD.8võ'nÃeòQ@cøbW+¬š^ÑéÐ1zƒ|<{b¿/Ëk(Å'Ã/1ûG“¤ˆ„QMgíD±L!Šg+Oà]å}ä`Ÿï¡u_FçHÍR¯Ÿ~TojþbÜ•H¡‚z²atñÄÁ¸f¦G±¥ì›³)äÐtMÂóïhÞYBS=°í¦Fì ¯mÍ•ç8¥õêZx-x ã{g=>ÉÖÁŽòýµìPAÑôô2PvDK£˜7²@]Œ¾¬ÅÜãq΂Ze².h/xS‹HÝIW~òàèÙÀö@%ÇnP¹5À&ñvT÷1;­ÉÔ‰´Ϩj`M,*Ó©IÔ”ÂÝÄRj%qTù* +F¼½uv2 Í…°wÚ3p\àxýå6ú2i#;~sTw½:*LÓ¯D3Ÿx8tÔ™ýaMèZS·:©nH—¼œ íNy +yØ­g#J,“`yIg4éaÐëßdÛåb _ª—‘Hfó×Ù9À±¯É(iºÁêH’éâÍjºCÉ“$!Öî(ÑPƒäÜSî9f’þW ÐÉ»[3*Øø?o:eEgÓã|f$âÒþ´„¢ù*iÊÓÿ©®U0C׃Ñ>_íy5ÞÉd®ÛG!ñwü‰¥«¤¼il™*ŽÐà^D¿9O~ë‹8m8 ¢óWæλ£Õ6jªÂ§p¢¿P΂1hpBfá5jdô¼%c ¸0z–î2aS"G¨W !ÔÕ â©þøTÍb8j 2>n†¾:W)ŠLüŸhÁШ6¥J£Æôä©®à±|i R©ÆA5ÿ=OgêÍCl’hÕð¦q›fª>L÷RÑ ˜ƒ¨ä€g¥ý”»“E±U×Z9 •ü{'vÜ÷¡¹::£®“Q&ç.‡k;7uøä¼@ºx¼Á 4R¬£ubcþð8”Ó;0Pö]¦4oiŸ×Q]vø‹>ÇÔ‹Ûd/ â 3Ñ-W7 ›´½ñ] «C‘ý}ügÎ},¬ášt›/‡·*È}Fª@¢7 )јö•ø3Ï,ýÊšþáÒtJRå“W nÈä›ÔÙy1OÊÑ{,±Á$C`l—±z„F¦ÖÀ ÆìÀèóPÃOE~®§þNc{EžÐEÃýä‡Þ$CøïÒ2Njü¿¦6^RŠbÃÆ »è…O‡I`‚Hédûû™ÌCìþá|Y+O’%­Ð¾vÊ%­îÇS{}LháÓ0û+ýÙ¥r¬g·§¨KUåèiB«]´v× ¼Õ²[šJ'Ÿ»Ë¤]K?€Ù=‚ ÉNˆuØØæ÷n‚«te|‰êÛšü2%A€ta_t*é?µd@ß™æÜIäÐ’†á…ÖŽ¼2Ô“Üqy>„»óâ¾âë;i íõMV!¿Cw9~‰W^o #Ãkp<Ñö+ÞjÎöҕݑϬmUFýBž† ÆÎ@“âñ¨Åm®…œžþEÿˆKV@vGã~>²á©›Á1›w/{ØÉS*g¡òemsÅ'[É7TÈ¡ OPÆŸtד††4kË“»jMý‹ýP¡“…B U\;Óm¾v/2‘WJy&$IÍwE'd”…UÂG¡Þ¥âÛ“ÃjQ:x£ÚoS¨ 2¦æ0. ¨‹‘êÂÑø–»ÒQ è‚ëíièˆû¡â+m[RIûzŠ¶†[Ž?ÝØÏC¢±\o·Û–*¼­³0\>xƒ1-²!Ìu3︒c»‡¨sоäÀSƒ`#+±9ƒL”ˆ²xŠüPN‰=íÎGR%ˆ=· Q‡ šÝ¼SWî_XUV¶¶”¢‹µ"K>1Sdydæ¦ÝÜ:‹øFô̈KfyÚêX}6¢…B´M=:$ô¿ôü |~ìá,ˆÜìèŠ3¸×]º³gGb^!oŠö,†€v]K|ÓtäÉf®KïMsÊMßãkêlm¦ž+}w~ŠÛ’†ûî°ò8÷£„ €hÇáúªŒ»›]Ã.ÚÌ^4à˜êÉÚ!˜í÷Ò®›¾ØF§€G’×ð|âÖ»8Jìòëáµ8áéclUéÅÕ6J¥–ôï/ÀW\ÎM>ëô»Á‹èQsµVï@d Ô]:JÍ4ƒtQ·mçæš3#F#| J¡®˜ÿ‰~~&&;Ie-“]÷‘³š0f»*§$R}â}ç¬g=ª,âÙÃÖzC–¯#«aß1ºhoÜ"®êÓXgÌf\÷¢çõ ¡oìkÅCÉ/‚X…@œ€YWŽ}Jc¯§[‘ZN\]íõ³ÍŠÚ|ëÓS¼fÞ±­ç‰õ¡-IÁ¾%4ËdH&s†b(0ØïnÈãFÕïÕ Ûˆê<1¯J˜N×KÁIÕÏe9W¸ì²–&‰HðžP°vtâ}þ€­ŠTÓ£Ü'Š‡aUÆu½Î+ª*7RI‚EÝ"ÕÆ]µ}šÞM,"gß($ ‹ãKWJ$[¹. >èø­0-ÞËóÀQD}è5’‡A»œjNsbéª j)ì@PÑ™»ÈIªÅ’Þ-m´Épök`àkƒØ© 2¥j_mª¤%¿¯Î<Û©ˆn¡ïôšyîä•¢Áq…ØâìHÿ±ÆíR0<1Û¡-ïš/§Ê鬙Ñ";€XP¢3bØc ¿[WSé<4S78@^›z¶ay}7»Ñ!HÝ’ ½Žzd8NÞg,Ÿ`Sn¢Þ+`Sé%φx\1’8å‰rVï“å•b¥ÅŒ8A#¬Éß>0Hì£ò&©r‰ˆ¾‡ã WrØ1w-#±\¹ÐKÇ£$‡9ßáè“,ßiŽ¬L«zAŒï f§Ÿ3‘—+ûë£g‘à'¤žÁ¬*”#îoí0¸pµ6p_ M_ÆšžË‰#…=´Iÿ+³jT$.(¿”óæ²Zø`‚KòAM Áp‡ˆ§x|Ñ +»š•?s9ЄÜ8owÊí>Í<7ààò⇚ê÷ú|¢#dÃIY¶ÁDá²áAÀ1çàãsFF|ý|¼EœvîðKº?t”ã#bù·Š¶}ª’% Y 3ô#»…¥´:ÐqÒl9Wo«‰ jýäûˆøÇi¼àòs‹Làý©àâ­zx¡bxEr·“™Ø(¡å6T¤À-ÉØ¿ ö23ÝÚÁ¤€Úô¹„uÚB¦Ùyƒ dª @ˆû,Š¡ø¼ Å.I¯ +ÍèR‘ÌŸ–Ô£ÁÇo_e‡œ¥´úõcç¼ï”4JÎjÇ +†PЗ¬ÙG—h±† ëû¶ûe§T£lù½þ-ÄpͽƲ v,…{gï]ÌÕ¥\üÔ¯UÝ9N&ˆôx_²ºÞ‹¸¶ÒK§m‚p=‚ŸØÁHÄ‘wþþ˜)],í´T÷—Œvê@?àó#ýþù››‹¬"F–d‚ ){`ÊÆ!}ǡЦ2š‰¼…–uåF‚ $9 --G,AO¬7³}#ÕÖ_ê,‹màËG”­¢îÎÊ&}Ð õë°Àz}¸%6šk¯tºÄG @Ñ%¸#º† +¯Ä™Äp¡ÂÀœ:ƈk d–w¢p_H?ÿ4lþ Wz””æÿŠêÂ׆ËNu¹Îzk:ói8ÀŒ’™üR™àûï=Ð#*õ@Å>x™öÖÑ›sa =÷2h…¨í«%êÛZEµCù—¤(«ÜX[åÔr—ÖòÅs,3æýÉgçUWiØ´È8HÑvŽb+YÉý‹ñ7§©¢TIUº³ô Þ#Ø£¢€P?E`ýD&&6×({ *ÐëFÎ\ÍŠà_±¼FŒfxPS¯ ‚ôn²—ä– +¹/Ä)³Àà[‡…\¨êˆàt4 ëlbé餄¡Ÿçbï‰Aþ‡©ø×ÀÔXÑC£F€\d#›ºDèoº’³q‰Q¸l~vóøtF½]ö ïÞÄÚ r–l˜Úe6I‚*uäs‹.+àÑ«¸àUÒzù·ZÖ †e¦¿¸u|Mëñg259#\°Ó§”{´ºË¿a19ž:žÓCK\Jxß„ ¥‰™•Ò°†Tökû¤ï.b´=ã¾%0ûƒ`bóJœðí ƒ"Sý9Qšy;a^BœàÁ…á{,w</Žë~¢Oåx3†,ÿÞ%í„øµôrüoX6~=Ë„ºº@x!ñ#Àd^æŸ -õGFDeºØ¼&a¬n97‚}?`šÄ©¿wWâ÷«SS___°6š°¤~cè¹·°€ê·Ø²^²nèŒï[{ 3±<*v—Ô5kç”T+'ׂipщ>zÞ[Ì&ÆX;$- x}:lŠ0eÐ<({.Dö‹ªSßãÿWöl ‘eqRžrëò⚧;#/U÷Ãì[K¦åíìÕ¾È9/ï—ìz¦H¢ éÂ-$VñÊLÚF¤ÜEãƒAÀê{ü†›æ­›Àlµ X­==…¥Ñ ¥– X2¦I{“̺è3Mª„cú0÷Ç–â +¿SúGß. FËþRãš””‘eNè Q’dWRË-ßFIÈU;ÿ{)¹8™=3}ºnyu~‰ôCT·¦·^<ö]ˆõ\j‚mElDÆÄÂh¯—‹´{€ÄÞÿ³‰ÆM5Ëóìx¢Õ%—y:'|‹p¿ ™5ÿÍžg^)`Bò¡ƒÔ|îÌÙ¾1ä™O¥ílwv1'°rñ§‡ÞöLàrM©S¸-ëKÏù¤1i+a"•$®$ˆ +¼Ì|°Ûù;žÌ¸ô´XáºÙš“J#/•Ò1¿äÔA_Hù–ž.3hLâÚ ÓЫAM'¤Èbh€ÞNe— BµŠ'ƒ{Àt¡9,å4†¥ÁS–·2fü?;™ÒmÜPßÁŠ³ª½àﶵeÆ»ú²¬ZvÏFÁõÑñç#Ì1ä\Æ9ËI-¤ + 7%LMå‹n—ŸšzÅl¥½ýHݘU¸a´Š¡ûQtBK)®&¬Í盹j€‘@æAÿ&YwZp@mŽØ3ÑØä¡à éº¢O•*2n‚ÑÊÉÒTz÷dz¬±-‰à ÉE +?ZÌŽÊÿ,k¡|…-‚ýœ*ÿë6×÷õZöð öŠZ©w…Þœ¸]Öjñ?ñKÐNOƱãÞcƱstÑz%”ï¾KÇ<ûÓ-|Z¸ëÉsÍ— æ6µ¿+DWF§¶ˆ.†«[“OÝÞ†;'ü$«:vöÎk[¯hуÉ­¡ÀPjÄ%Aýû*YS—.q~üûQÓûÀ±QO­ˆz~¦Ãº> Ì6iû´à7}^äFò•CQ²šF'ãË}Áš§ôŠ3$½Zœö÷Ñ4ch¦‘np“ ǧ³LŒ‰’ó`¢x—R HêI<Ý>_¶ªud?Ÿ)9®Œxšc9÷ L´b(Å07DpGªœÄé¡+FQÃ(´Ie'0é’§ “B#õ +$Œ;Ô÷²õMÊg˜qÆÝäÑqZ$cI%å_ÂEñ/JZô Øž¸ÿZ1¬,Ì+_I3AäB%^¯‡GôæF†-6«øA¡úÐWL~=6Á>4ªê®æ¶P@æx +Ub]>Í̦ì¬*¢×ú4æüxmFû.›ÿLé!86Æ»ITÁé"P\¬Á¯ð) endstream endobj -1980 0 obj +2230 0 obj << -/Length1 1188 -/Length2 2740 +/Length1 2210 +/Length2 15959 /Length3 0 -/Length 3928 +/Length 18169 >> stream -%!PS-AdobeFont-1.0: PazoMath-Italic 001.003 -%%CreationDate: Fri May 17 11:17:28 2002 -%%VMusage: 120000 150000 -11 dict begin -/FontInfo 14 dict dup begin -/version (001.003) readonly def -/Copyright ((c) Diego Puga, 2000, 2002.) readonly def -/Notice (Copyright (c) Diego Puga, 2000, 2002. Distributed under the GNU General Public License (http://www.gnu.org/copyleft/gpl.txt). As a special exception, permission is granted to include this font program in a PostScript or PDF file that consists of a document that contains text to be displayed or printed using this font, regardless of the conditions or license applying to the document itself.) readonly def -/FullName (Pazo Math Italic) readonly def -/FamilyName (PazoMath) readonly def -/ItalicAngle -9.50 def -/isFixedPitch false def -/UnderlinePosition -100 def -/UnderlineThickness 50 def -/Weight (Regular) readonly def +%!PS-AdobeFont-1.0: LMMono10-Italic 2.004 +%%CreationDate: 7th October 2009 +% Generated by MetaType1 (a MetaPost-based engine) +% Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS). +% Supported by CSTUG, DANTE eV, GUST, GUTenberg, NTG, and TUG. +% METATYPE1/Type 1 version by B. Jackowski & J. M. Nowacki +% from GUST (http://www.gust.org.pl). +% This work is released under the GUST Font License. +% For the most recent version of this license see +% This work has the LPPL maintenance status `maintained'. +% The Current Maintainer of this work is Bogus\l{}aw Jackowski and Janusz M. Nowacki. +% This work consists of the files listed in the MANIFEST-Latin-Modern.txt file. +% ADL: 778 222 0 +%%EndComments +FontDirectory/LMMono10-Italic known{/LMMono10-Italic findfont dup/UniqueID known{dup +/UniqueID get 0 eq exch/FontType get 1 eq and}{pop false}ifelse +{save true}{false}ifelse}{false}ifelse +17 dict begin +/FontInfo 9 dict dup begin +/version(2.004)readonly def +/Notice(Copyright 2003--2009 by B. Jackowski and J.M. Nowacki (on behalf of TeX USERS GROUPS).)readonly def +/FullName(LMMono10-Italic)readonly def +/FamilyName(LMMono10)readonly def +/Weight(Normal)readonly def +/isFixedPitch true def +/ItalicAngle -14.0362 def +/UnderlinePosition -167 def +/UnderlineThickness 69 def end readonly def -/FontName /DUJUUF+PazoMath-Italic def +/FontName /XVBOSG+LMMono10-Italic def /Encoding 256 array -0 1 255 {1 index exch /.notdef put} for -dup 97 /alpha put -dup 98 /beta put +0 1 255 {1 index exch /.notdef put} for +dup 65 /A put +dup 67 /C put +dup 68 /D put +dup 69 /E put +dup 72 /H put +dup 73 /I put +dup 75 /K put +dup 76 /L put +dup 77 /M put +dup 80 /P put +dup 84 /T put +dup 86 /V put +dup 97 /a put +dup 99 /c put +dup 44 /comma put +dup 100 /d put +dup 101 /e put +dup 33 /exclam put +dup 102 /f put +dup 103 /g put +dup 104 /h put +dup 45 /hyphen put +dup 105 /i put +dup 107 /k put +dup 109 /m put +dup 110 /n put +dup 111 /o put +dup 112 /p put +dup 40 /parenleft put +dup 41 /parenright put +dup 46 /period put +dup 114 /r put +dup 115 /s put +dup 47 /slash put +dup 116 /t put +dup 119 /w put +dup 120 /x put +dup 121 /y put readonly def /PaintType 0 def /FontType 1 def -/FontMatrix [0.00100 0 0 0.00100 0 0] readonly def -/FontBBox {-70 -277 902 733} readonly def +/StrokeWidth 0 def +/FontMatrix[0.001 0 0 0.001 0 0]readonly def +%/UniqueID 0 def +/FontBBox{-491 -316 834 1016}readonly def currentdict end currentfile eexec -ÙÖoc;„j˜›™t°ŸÆÌD[Ï|<3322ãý¿ô9I¸fÈÂÜ"ýÇXXH`ì{¶Ú(Ìb$`á«2 GyµÀ<“»»É\ð&’ÌMê¨ÒêµÂæCté+Ë…B›®JvÀÆ·mo÷Ïš}^ß¼ éYTY½·ÞCÒ]Sü=ÚnðÂt9x¦Ð>ÌíJòêKÌ1¾‹ž'r6iÁ’Xï¯Ü'l±­é Š”6ÑŸ±Ã=ïvª1]Û -…Ä‘gåŠ@ìFÌ7žªàpqûÓÇíõ')øjjOh%SÂÊösq™·=ÓðŠ{ãÑ.] ˆZ§O7Ñ”î‘éEÕ„|Ûª´u±BwLà Ñ`'ª´&ÏÄ0C¶žg†)X0F?nÂY¼›«&‰Ñ8 ˹r¼]lpK¹GC°ÓpŒŒoÄŸ³g\´t/íþ–bj©ê}t SòEî š²£¾cûqR7¡VÀ“L%ìŽY'1³¼b¦êÇî ˆJ71Ù®SU3ÿ=ºg¡OŒä>b¸H”t ÈkK‚¤Ëc:ƒhíý׳A‹:FÁ.ÞxW˜(úäÿáüLÆ^œÓN1K °Ø@_»Pÿ̹>ýnéæn¹ïÂoùf³µ7-Uš EøßØÙlÇ#©+¢ÞöA5FÔ”¼kèyêî¾tçÚ:÷âR¶R¼ª2\þ¤3©Ìøí’ l ªvÕ"‘Úª> VÓûœ³ ù”íb¤…/¼¬þ™÷“&’¥íµæÝ"@Ñ4 -¹nD5¤tvÀñ_\…ž$ÎS„|Kp°ÔCOÂÈ1vF^É(âByÒ@ˆ‰`gˆFú—szVŽÖ‘‡ûŒ0‡ôÎQnÌýÙ .ßÔÒÄT=>>‡rÕYA,{ÓøÉ ÉQðƒ rçFä!Þs߸ߟ0×€ߎ5¥Ë¢Vô¼<æ4d(íênL¤Ø³[PCŠ©ì²;üŠ+qÞ¥œK V²:¾d£§&>J3)éÅ›ôQ6Põš|ÞB5 e í£ñ)\£@ˆ~7mÛknË‹ª$Ô¶úÜ–P±+‡‰™C}èòèo×*Ú(¨ã ·-ßè.4‰ÓÆ7§ŽWá?ø8"½N[.½^7/ÆŸéÌ3§þ/3±j(ĦwÿNÿË º×þ¸ wLjŽC; ìf&ŠŒcv^%€:ÆÃS,ƒ’yr8ºN@K}m>KW¿÷Kn­9×è}ê%F¢S–GhpŠx³‘„CŒå(¹€@òr XÕ#?FäÓØ0éʃº¾’#«w^øcuÖ»¢Î€…y’þ‡"¨-ÚlEƒ£#»~»Áõë¸%UŠôŠ²Í1b[ ^¨Â•1€›€@‹¹¶*°ßr«¹ÖEŒPâW‰ðS/õÆÛ?ÉûN!™õö< eâÙ* ‡ðWxaÏx"(äZ„°ÿ5Å…Á‹ÆQÎu?ÅéÊ -UªgHß{@^‘Κ2¤]Ʀyý±Î1°QÕÇ'Ç,êX§L káx2 ´ß]mÅÈŒÊ0Ó`픈âpº³:ì*:_k£Ãp -‚º“ÏŠÁ÷×%¼þ,‹•™véÈVíb]2q2嶖ćVas ¾|HzÊNê@=Ü«ˆ7ëik¬K‘\É}ŒpYöNsÏS)#â èôŠ„¡cÏì,áE¿×€ÝçZ†üí=D=0»Måaú‰‘fü~nÄľ»<ë“vž­QI¼q¿Š[ ‚]¹@²\ù‚$‹C{ª¸ÇŽžÏÝÞBŽ®0ôh”u‰9íp?{U¶ ~sU>í4ߧšïô2º¸ËB †o´b‘ ÜÎêbþObww»wªÏ>Â|ÎåGkh´6°–´„p rÎ[#õPŽ±"7à7vôÑÍEá8Ê–óõ›Á$;E£þ6×zóдR]}îâ¤#¤#ì+µëè0É"i%uCjíÀ,±|¯÷ÁšaÖSVMŒx›ËßÉ5àê‹Š ÿýp‰åÌÔ6‹ïUj(mEä*Üè~†mø™YÕdÇ¡æ—µàÁ¤w#ö`–9­Ôk?ÏÁ)€Â픥…Z©ÑÜçËW|Mü%H*'¢¤ýÝa>˜"VÑ›ÿ¸w5t¯’0QËPG“R%‘à®"ò*¸½¸“ý„B…u+Ö8®Î~®³j Xí< @œ±cêNÂnâøg¬YGй34ª¥ VÐ])¹Œ-l ÉŠ÷5þ¢•‘iþmÕ™UV#YÌ‚€¹{Û­õls»1å¶weØË(\‹OL‚õ EŸ¶É»Rþó¦Ô~–—[шP»så1\èŠzLê¦ÊòþDÿžg•SÂS[OÝu’p6æ(@1…Àâ£R›sP1í ?õ=ç¨úEìØœÆneM6N5‚†åw<3dÏq^cF‹‹òðž)ÿUÏIÂ2M õØFÅÖÆ¿äë~ãq -1йmدNIà B}ýfÈ·/ZºåMYöŽE(Y´Ö‹'á²¹¢ ú -T;U·Åì‡÷F7®<·{¬ÈÆLN-^×(¥£U'UAf0GivÄOð0P$Ý 4ÛªZ@º/³ˆ¯¤Ž.â}·L‡© ŠdlI/_Øfá°>k[عW‰¢SʪÇ1<" Ì£Çó—UÇÞ3 ±ÃØlÕûÆP.ý+\ÐõBÀl{½É‚ÞÄH%Õ¹éú'%œ’@-ÜƺüŒ˜¼žRÛSöÄëÊÂð覟yNuE—Ê|ž÷Ïù£ÝÝvæ‰^Ó+¾‹ÖåZúûR·kÚœ[ÙÈȈRÆÄýûÔ¶0ŠZHç: -wÆFe@g¬`!$à ûp±eš„pdwíé,Õ,#ã µ„Œž<€öx4¬¦«=u^‹Ur± ^‰ ÷´º×hIo Ç–mÝÐΧõ -AÖª^§Î±:™µd¸5âDѺL•‚:ÊÔU¨Ôœú-¾L::V -6» f žÑrº^‚7£ö1ë5aj³à¡Ašº£¨nWÉ©!µp© cílov®>ÕªI8©3!ÕÜ/ª˜ JÝÚ?|iî$dØ_ìñ(°/;Åb÷üÒ49Ãï•Û%X¾eÊal¥Øw픋v_Õ̹ûœ@dr®Ëp.álä§<çêÉJt~½ŸM«÷[£†yœ ä™û3¹î[Î{ +ÙÖoc;„j—¶†©~E£Ðª%9.ì¬>XJ‘ٚмD â"e?¤¸€›&¤oLH:]~•n¦X%„lþÞ¹”­ÏôdQ@ãa~M~EAËŸV.U‚›MØ€ª¾")éJŸ¢Y§4Ò›º‘º UËêC9¼¿ù2Îÿò–"\ªºÜ¡w¥Ô1·rlxؽØнtÿùŒaÂAÙ*Ó`¢†ËJpÁ¿Z+—¢TXì]› >:|s‡¬m’ºÍeaëŒØ„Œl¤·½v+¼¿:É‹£²D¾Ý([žÕˆÕÂG–ðúJJ¸£O´½Ä™I¦ªzÌ&ÀEnqšJâýEî†Ýðz€é¦&»CÍýªçx›h“+ ‘r‘nú«n¨»DœgúÃýEôŽ¼aw'À}ô”Ä|Ü ýꜱà$Ý%É—šš ד‘Z—VûgHTC(ûŠI_H‹áæÕ7}$¹%Ž\»yOaÛ‹'Oî¹ÀiÑÈ‘Ëi'Ç%Ïo2ê{´ô«PÉQÝwµåŒCÆX7%SëÃ?sê¯Íß~Õøu'iiqøúì³€þ€šN ÒÌ3w Q÷EŸ:šuÛ廕=šÓÛL•)M¿o¨i\ÉÑd·!Š®Ò<’ž0´ÎZEÑáR’Ë%qa‡´` ³õ°›Çkûâ)V&ìþ‰#û‚ÑÇRÇIœT´xMù‰k'ËeœkðŒ) ø}ƒÄIŽ¶³\ßqG¹#OÚëðv1ƒ;ÒdæpV5Ñ <Ô^¬c× ./#Q…E§UhÌÞZ»µµO÷¦GU˜c‡èeÿ¾5E«Ž„´»I©rô&÷¥¦v1Uçü4˜ÌSNä7èÌpk¤ÃÑøÁVe^qïÉT€†"šÚÄ•>î÷UѪ¯j "*ÚÈÉåá¯}”Iç’˜Ç]‹K•ÐÍÿŽ‡}Ø_p¸MVãÂ|~Ímv¤Ò5Ñ°¸žQ+-0"ô&'ƒ˜OºÖ`¶Âï iåÛmš¦h›¾)6¦k‹Ö¼eyˆj/ÿÛˆ…¨jú»Ýåo<“LÛÌC¹8ž&n°5Ì9È(àBüš,Á]©š;ˆ¼¯A‡4ÉXDÂÿ1Gv³èËJCò2ŒXÜ:.'I˜|ØÐ7ß©OŽãÖ—‹1_r:~‚–¯¿î‰ ŸŸ›8U ÐpÖ¡ýaErºÕ}Có±wú ¦{˱‚…½œ ƒ‡§l[]WóVëz,UŸ+gR訑²£hiLº¥][ƒcm>ǘ™b‰T08›°ÞÙ©ÿVAýx`üéðÇ!øä­ æŒÎw +.rmÏ){ìûz“´´"’ªý‰²5îxÂKå„0è9Ðu¤«@”ÅQ‚jS¾›_‰{•|Û'«¤€/å VF.¸ÄZ¤Á!z‹;ƒj”p.H‘¼R{ˆhBAŽ¡‡+Áiù*‰Ð?IŒ¢P„¸óøòaH‚È…Ár!ã‚~¢oë 2ÚßA9 SOœ}ÏÄž~4'±F§g»Cp’Ý|ºx{—ZÌ(HK›êÕ a[•…ï„,~m€3,£ÇÇñéTi”=¥EXò˜åa†©´}åÓÀËb9û-[Jã"O{R‡…ó1à*¤ã +–èÄÅl¢ë7H¯³.Ü×Hͺ¯ãァÝãÿ+2¾yÞºÊŦßÕkPG…v„´Õ”ƒ‘‹|¤q›+D€µè@i㻽IhJeþßH‘ìüõÍ=oÌu¦?ò6Ð0û+ýÙ¥r¨)têðÓlj»žñùÖR¤¾h§o1Wˆð”ÿLdO(üµb=¡5ÒâèN‡e@³cñ£¶H&Í•ð!™ïüôT¤ +Â)¿ˆWz½‚"˜_®×¶¡Ì‰VfíIfygñEŸYŽ<ºáÝ Q«JU÷»®œÄ$âwx‰SGZë…Éc¶µT* èOÁT¶ÿc bGn5ÀµÞ¦«¤ÞÖÞ2ñTMehXÖ=,Ðp8‚ W.©^ F¸`¸äz@}¶¤4奞3å¦Ò<ñT¿;0"eëø™Dï€Ï_9QÅŸ>  Y©cpìþéÅý‘ô³c÷†Qþa×馦î½m®+—rf]¸kLfzÆ/ÇZ=]n—Ì}Ùx·Ë8ƒìH’þ È›Q;}Ø7¦½ÿÓhG¼ñ šéZÈÇžE×£ÿjLÑ*m]Û´¿gN>ï2\s5ñ¶ÏŠ]k³âi'llG^}žU?fMUJÐìf&ŠŒcvRm¦a0$UnåÆÑ“IàjV;9ÌÛÏIº7¤®B›y`‡C‹Ù–ìgRËjQò‡bž»¢ºá6½0™þ” ·þyñµ› ý®,>vÏèîúÌ}J²b¾¦Xªg¸üPx|<ÂÍ’U%©ÀŸ`çÕ¬Uò.`ÒÙ +Ÿ´×½Yé)ìÕü “C Ã’ÁòýNþÖ‡0FñN… 0¼ý+jI¢‘&Ì +ØË{e~îØ_'ýX^®jRT´:3€Â䘙Ð7Ù9·rÖ—[X0%ŸlOX?VÁ'ãfmøGmZŠlÀ¥±B Òë⺵ˑ1§mêÁf§ÄI¥IêQõ¿yÛçwH(xtçÛY8fûRëžò±äb5j¼¼t‘ ë‹®‚ÉñNŸ;tKQÜ×Ò–_׃HÎ<‹c8GÞ`ŽdP]èj¢|é´Ô‡­j +ŽýÏ&»É¶©Ð¤c¼·•>(_Ö&áæ;óy¦¶ úÔã]À¡Ñè +‘±•·àòçÔª])J×b> w«’33ŠÀÚž¦ÙåŒÎ¾ç`ŠÅ¿„´¥Sçáævžmß¿LÆàoN"¡uÓoq”ÁðÓ–[˜‰cg† †7˜'î÷B–·wGɈ«é +üòUÝ|€$]žŸÔ4b€“©šak½ÌЃÙÄ Õ6ȆÒ[qÄz¶B³ªùv_÷ÿ.ÂÂAvÜô$ϱ€WäÍBÈ®á¼Ì¡ ÂÂÏmûœ÷~ð|Döôä^ʉÃ.ÈçÅ8Ì.Lõ¬ì´‚ü6¢2`dÆ…tû|Á¡]ÁE04ÿê$?ë¬kÔöJ!ÃËXg ^ûÞbÀÓ£JÛˆ ¦ä}3œfü$‰4P-dÔªÃuAz)] ±{'a[—Ï` Bwî~¶v9+„ÏA ^9Wƒ°fÕ¦[®J°8~@%|æñüF,ÈThÕ›Ú +rÆø<÷HÈ“ÙBÁE Lü֥ƟïÂ[`äÌëe—`€ÉÉ;Ì.‚sî4äï{8ùâ«=3bþ©¸1âiÄü‹,uáh¡1¯Çgs+Ã2¯Tpe:e$äýÏÜ­Ä +U;(©Ü3UŒIi”w>gª¤Å3î‹ý +ôîº,RŽâpØ‹¥4eÝH?6«j™Q¨xÔ[ßöÓ-ÆÄ-ظÜÞ÷y‡È¿’'˜„("î‘Ê_TÛ´Ú$Ñ4—ëæ3Ùà/×ò|…,—Îác +©šû“ÓTƒXЄá<±¯©©m5å­žÓ,ßð„ÑÀä‚…ë\×qÐE)CdW ©è0Àe„…Êcm|0Ü@ÂئïM…5^Yn&¶uå´6M—Xÿ¼%òÖ`Èñyƒp'ˆ°iÚffd¢ÜßCVì¿3%¢IK#ôsyöGð´ÞCh> Mú[’ÃèJÙC~Û}Pq_ýyóL+ ™ðŠ¬†Ý¢N~–׈Tr¡ó +]uÍÄ°Eþ)”‚bâçƒE±{‚–†Â=”ëêlžeÎݶ áø— +—;S I×W4Zi*I.á+ìNyqW· .U´>u}²’ ©­˜`l$ŽDSØë­ÁŸì3ÝoFo€3,£ÇÇñéTi”=¥EXò˜ã†tÐ94E%$â^FàÚé!‘}ÕØ7êØS7†ÐkDs#©ŽÛ+€6'1rf*gÒõ¦øoû¥Æ&z* ¸Ì{Nl'{{¬iªÁì5¯,>gJS³¥V~ó]X;©¼”ñ1íÉFÔ!´!üuÁ>ïìc+&Z¼Oqd4IGRiÎŒI‘ Șzw»ü úš’‰†ko|øÂÑi¶{2 [‰¡ÆÜ0B(Þ-àT¶ý½n¨¥/æµÃ¦e /´ÉçÕäôoèJò™–Ú~†ÐúP„IÒ!|‹Ñ!s©ÑdUg~2ØM¢CK”¬àöK…"Kƒî”…&”==~[÷Ëï†Û*±Ó¡ø™$‘Òvbж#šÉþǯhÓR|-‚âàu2 À¦SøôyÿHÌÃîº+uFXÞ±ø°{ž†A'âP7öò&O1UÔÝa´Ì´âOƒ_ˆ†ê.÷éÁesgO3LI¾ìõ×RÛÒÀUMDY’IãZEn&ñrÒ–?”sÝö%Lb?Ñ3Q*Bh{KfrØ岿QŠÊd Ô `uʼn³é~qvb©}´^Z– 0G>Ð…®"ʸœáìiXì»1($j2þjÈïn”›¼GưƃgNEÒÓ^•ÓÄ<ÜSàû“ê[á¸V îŸAý y¹º‡jÿ§hóàäÑ)E=̇ÒKî¹Ü£ñ½æ·ˆØÒ\rê_²9p«o%˜àaØ2áTue)ÝÒÁ$) òs6à ˜Ñ5àÖ×}ÉýTÏ!òÿ3ó'[Wýk"}!,ñ³Å\½·ðž³|Œ¨lœ‰Ù¹+4¢á²c« +Ãm‘»;qà@¿å#$Ï…Ðç n•x'á,Ë~"wEÛá\çš‘Á3+9|×¾õš ØèzŽ‚X‚À) -{¹•˜µt¹|@`qR"ÐÛ¡Œçæ˜#}Õ³|¡Ü®ˆ%ôÆG"hæo’p?OKÊx-"Ø¥ƒwõ±–äÑ\pîbÝ5™ƒyMõixÏ;ˆ°ó³ü77eìϘߒ’ßy^X5Ó‚~Œ9wgpÿ?¶ú^( Þœ±}A§’Hj0UE‰N + +ì8Dǧ›®ª©? Ix ù^{Ûi¨#ЛÒá Á»œ†BûÀ…F\ô%sÈxåáð©/&P\CXg¶ºJ;ѸI ½Þ9ÄYíx–*Њ wóx®¢“‘ ÊYé<`X’ÿ×$ÁöR%½OØKÙ89k3›ØgÓl˜õÄâÙD§¯iöÜ‹Lyp¤À:r Û©ÿá×I·6Ùjƒ9iûW2ÕÚøõ®®b›y/¥D=w3<‰(î¾ë1)Xqr¨}b0ûÇ™AÑDÎG9០+ß +&M›øì°kQ—æ¾4T¸Ê¶ÕòScS/½²O“â«+¤RRf‘%£è"Ž½Mƒ<.è¿b{ùôs¿À|î¿Iñ±/Oe“‹Å&{ú¿¤Íµ; UëC%N†X˜cL·hè]*¹Œ'jL‡pÓæ,~ê—YÓ7¨C~ Y®ž¡öl/ˆÀcUÆ#Lš¼zTË—!Ù±œ£ñÅ‹pÓ†Îñ_‚bzIš—NDÃ5¯ãÜÚ·-þÝl+›&æT.ƒáÈè¯E2U|§£+… +(éïy×òžy?oØõ;!Ÿ/1Ùã”F|Õƒ¯ngý6,¸/±Ì1nZïÈ.,xy°Ùj¦¸@>xl5Ûvðæß²ðɾä"“Ï”asÂÏRW‰Ô”’¿1õSbן6,0]8eÅ +Ã!ZË=µ‚Qo·$l{GáÈk‡Ø„ýyÅ|ž©”èù-ŠzW³‰þÌÜ¿·°ÍnE~#¶Ïs'¨úFpŽÛ^œ?”JÛàm7‚Eùψ™P—:n¬¾s/á€}Èò•úÙÏ묬Z†bÖ|QùðOdÝ¢}xnù­úÔæadWþ¾ûÐÉ/Œé×™UëhNo&›H}n¾fnW¸£übü8…`¬š+ s}³+R‹¥F5P÷§ªâwd˜ +åâ-4çLÇþ3²÷Ä<3väøjSfÃ}2³ÞdLs¾1$…ù}Û´XŽ=Ôøäþ“ëÑ\™ ÔÏV0'Þ– yÿŒçT-·Éåd(3k¸Ô%ÇR`ǺŠZGfîk‡˜ò‹‚º´'Ò³%ŽËxr +Ýžý*dúDâúì)!ºûØhü^Av΋Š˜l;«ê¸À¤â,X#R¢hœåæìÐJ ¦àH›ù!5£”8+È¢©Ky K]ÈR^BÕóï ¨¸<>ÄãÁ5(nÓ²bz){yîÍC¼á&ËF´ÝÍoõòÛs%›6) +–̱ÊOÇÝ|ú›%ã×ù†°#ºŒæ8|õlà%žYšÃniŸÕ7?Àb#@IF"MŒXúù¦ž ‘³êÝó}¶Ækxõ£z¾øÜáºAå)ê&¼#¤iá:»ÒA˜Löàëkip«¦?å? ã)ò` ¯hX)&ªÒ­ÃÐ"k[Œº+ÚÓñ^—pDÂGh61ÞøxÞQ™€—wŠ‰šn,/,·éñlØ Æ–þ[©˜OÇ1’2¹8j“#mGÄ'$¾zptr,ë•4q‘`=vsý»ñ´p’E”…Ú7=p¦HV^VÌF¢Ã[ˆà"áӯϑt¹+R÷õÚ_:9ýï­—è8›æ¶bw=EW ¼`â·q:³âP1qOL- !í˜1«=ë‘ÍO;R²ù‰‹=ÉdCƒ¥õ®6kº°t¯^X­¥›ÒÐPž¥ë‡}W¡yÿ4š8#?RóÊÐèCâºèT§v‰ód‚Ü7G«2Â"õÂ=´àX¶[¬ +%ÃܧRÖK{Ý©I}ˆÕ$óY/eÅÎT˜Èœ9ó{—CÓÕâÕƒ¿[*zQÙ ž(b%¢bÚ>³\†åBÿ#!¶mì/fß=_ŠL+¦C¿êÁZ\…=ÅŠ;à¶8áfÚ6,Çx‰T!ñ¦`=.¯·ÜPkœêxøy„ùp§yq>åFhRyß\Þ˜úÑ{ó t4<ô]lâØŽ'%¹*ÎÖÿ‰€ã}A` p Áz¹·ÛŠñ̤PR‹ iieDö±ñ)¨¤’~€Ÿ¶ÔÂ÷ú†&£Ú´o—Y0¿/ãx¿ê¬HÈØèì,`i¯:…òL¹"ç€eJ™é¹ ç¼w…ì< Ê GG"Ä°c¾]g;qdZ‘*Í<›¯Í³.Û¢óåÛŽi÷‚ÊÌUnMÙ’joƒÞ³áq—‘×ã£èo-ö¹Älÿ$Nc/·Ú7Úäò%ñÍÂÊàÏêiö#“î„;Ÿ‡¿T½’—[‰©}´׫„{¦Ê¾Îæ~B¸ôžÐh”~€­£ÄÊÓP¬67.Åp›Rà½ú Ó%ü”{™Ž¬=gnñ˜d´#ï$(ˆvå´š…æ '`¥HÇtRâù† Iž‘ƒ¸0ùFVÁ¤ð¥Ò²&GÙÁyL‰–FÛbÊ OpäûÁÅxMÊa‹ÁƒÐ6ÅêÎ<½™Où#'g‹P'JÐßr#R—Ì´|ý»bkTVrfê/Êe¼2ÂvyiÖ«â•/™€krÚ|ÃúüŠ÷‡Ž°«1PªLOmx,Á‰ï+Uv­TÉV냗Äâ‹ûÀlÉ!ä•DYÎ;üzªº1u.ÿÜÃÒq>î‚W·;²ItM8M‹ˆ»¡êúTFc÷: +Ø¥`Þó°í­žẆá<ŒV_“Ê>Òe!þSžKЯB '™==dÐHÎ4V ¿Äa⤿wÁ01tÙ%°zÌÌÐüˆÊú™ƒ–*ô€ÃÄÃQÞæsM à âh,ÏdÌl‚šJ®d…&e¼¦´Áƒþ•ïªÞ6vYƒQCBH»„%+"NÛn‹bÕ ædÍN„´Õ*É6Ça6é}°Q¾ý9-o :Õ,Š™GH€™£r±9’9[Âø U/°‡`lñóMTÝÄåt¥Ü[øWB«Õ†ècˆq™¬÷6ý‰¢s<‡ÐÜ@Nþ Aè‰ P.Δ–'F8QHëĆJã–7©M•j÷­¯o2jœðxŠrCÃF!Ë>eH{r¥N N̆Öýz]Òã<üÁJ 1üŠzo}D‘^)¾Ûïõí•Ó æÅN š93yõ2Hèöýb÷¾æÌ™¹½²~»ŒO=R{“oÖñïÿ˾J´ÃÍ3_>‹€Êô ·åYøòÇô­`É8÷s=¸5¯8òÚ­Ž§vUtÜÚýR«þÂ>gúÒi’‚tL(Ë @ññ;²ÄðïZ5Øbëð> +±»¡\‡˜l2ùô)ËíüøÙ/ µ*Jµ[X¾ã¥mц +sÒê‰(>†Ž]7ùc¡[ëdž3»àjŸVcwÒÞ læp¯0d¼œsȹ«@¼ŠžêÁ¹:äëoVÓôo.^ ÄЯ¾Ñ©¹ó%ÏÁ¡“´•–³»ºîšÿâôež: à}F*.Qé׌ «‚sž{–R ÕVbú¡8ŽßÃÌ+p¹sQØßë7ÿ˜ñq–ëYK¨—èªWÔ£ŽéèKðÚâõ®¶šŽë{£Rs¢@Iƒn\‹ÚïYõyµˆÞï¿ yÉAÃ-„JUÞÎ%XInkùGEt +í¥Cn0fœÚet.ÐŽFTÿsŠoX­Û‡Ý ˆ:öUu8P¦7V]Œ¢äœœ¡Í®Ìû,;.ð9l¾nEC¡L4ô×(éè ’y9)Ì=A—l£ð£<Ũ0µÅÕíøRÜûˆ¦ÉlOâï»RCs†ãéIbÍnYÃ$KÈœ)bØ¥0m¡Î\Ò A‰0˜ÜXíÏ9¬Î,{+H±Ú¢®ë¼DlF-¯õ­óxEN¶ñ4ªÉmÁÞÒ4ÃsP=5/0ŒwcТC¦Ï3ÑüW–®iüçn¡‘rÍ÷R͉˜P?‰( r’NAe‹½sÅ=«&Œ¤^5=_≽éZbõÖˆÒU5·ÏÜ¿Â6½Î›×u!“<¶?Œ)%ù‘o‹‚J‘÷ã‘FŠ–+ª|nÆ9¡p€3/èøFÆN7"$ÕP%køÚTé)qÎþè¹$û1œÙûy=ñ}<ù‹=ëÜÇFù®«´A#ðúÈY'ãºÑÌmk2o±h’~I%S&„¤)öSkyö”Á*ë{¬¯CW_²#í~¯[Ü2¯Øû1w¿+n«Ò [·}ÓTX¶b„Õ¢ätSŠRL;…³{h‘< ûíù¾,â¼jA }íЂ, qîYóíÆÉš´ÌP ÑEc‡¥"÷ÂU&ãÕàìŽï¼’™‰33˜'í„OàwóŸm¬£zÙ +£oÕ‚íÎiîÞ% ÅŒœâ©ŠŽæÓ8`Éê»xOç–TçÞ¶Õ–ý¢A61Yˆàw2ö™ó‚¶ÕC×´áN¦\`(ÎÍóË´n(DÕ< +µS‚EòÆ0Ô‹Ï8tV¾î›µÈ} µ¾ —‰ƒ¯ê¯o¶xob÷Biî½&ÿE=ÁM™ð$‘5JþÓ|ÒŠ¦¶¡ÐXkä„…«:ékº´,#±0礧V‘ËO£\Ái¯Öœ„¡é[;¤î.Õ@Í% ±,ܲÿ R& +ÕPoñìT8¯†€×µiY³ÃVy¤Ÿ³˜’Ôéú^ÇÁ,2¹ÿ“Jó ÖàŠo5áÙV¡ŽL¬oíšúsØÕ,°í‘AÑ`¼ Ó<êéYýS°N&îFŸ^I–¹k;ø¿òsCüšN3:…=À3åŸì€„>òEºC…¸Þ¼âPÚ2Ôý¶j¹MÝbè‰}¹Ìé£qpðÀ*œqC×"ÏÀ†wb'¯–fý‘ƒß…ºOKÖ§pDvu9Áý'ëÔ0ôf1be†ýw’yõuþ3>/¥/eË’[Ú\˜éŒ{¥â^áRþuocÀe "ò ¼“ˆð7{CåÜù„¢¸Ýk£JCq¦6#®eÓÊë€A:`D“±ÂMYâ¦õ…XÊ'N@q~šõ={Ó‡¸ýA6q#ς؈½ZÃCUŸ¼rÐSçטA¶Êxñ&³úk'%Y~æ.ø taŒÁÜZU˜CÓèlöÞ¼ð‰ÿ‘3`¶¾a쀅ómq’O¼zD#vÕdÊû_¶*Óî\^æÚ*8¿èÚô¬¼ÆQuÿ2'k¾”Aô¿Ýx¹L=ÍÝpãÃÎ7R¦h„l\YöðAn AÓkHe""] dÄsH )…-Î}mÿ‚ÅæcyýÑKqIrEÌ‘e#•Ä,Û™Gs;Û°K;ÀÔÞ6”o¿ÄÑ„Ë4qùXù‹]A3ÕÌ¢ByŒ«ìˆþ"v$‘yn•Ç“Àð•$B€­‡ eÓù´LŸ7[œù‚Ð<KÌTµÐµ)Ë®JÙ†ˆ?Ùâ³·Ø”À²:—ЀÂd¿ˆzKįÀ ¡6ãEÚ-æXƒOS—CÇÝÁ9˜®ƒÇcæ×E~¨ÍÏ.ªÙ|mšl'tS4àtTbv~ŽŽ¥<˜…>Ú§,)ćB·ËRDz_ÓÂ&_(e;1z‡´‡ßg)|Õ¸Ýò§M K­GÊÕî(ð¯U¤¸d¸"ä½Ë¾)ìS$‡e£Š¨ÅîÝ;GH%XHu\‡$æIùü‡t\,ôF^ñK¨2¶ÞÁ½…˜m ]Eu‚é ·£ëû¨p°øЗÌlŽ¿ê8H Ïe/`.$¥€0 Qɾ|ºb—惇¡a÷,‡PHÆa 1, +“S†}Ú Zâ}è9@+É)“Ý,8HÖUªè`qV)“ª;°kª€5‹ÓŠRóI#çÆ>œ¹â„Ús<¦Ž/ ýeÝÇ/è7\‘ûÙXAvrX¸&Äc«²z¶çFοœè³IÎ~/9Ó½ó“«Õý´ý(¦íAÕù¢XfPé.—jB`á ¸{»~Êh ¦Ä9µ§™^”½ª K’ðÉÆ"r<¬UôÕ#f(s–ØïZÍUÖxÿO‚páÀã6PâO5>B:’TÜNk^¥vŠ©â& +ùQf%Q§½oÇmW Öª¶Ý†rooØŠ¦yã>>ÜD²ŒÎu°ŽÈ]÷í–ú >2ëq<Êê8™]ñ04CÜdÅjæ¸þ¡P9QEO篢œÝï–éQª©¸î³ù·¢®-£iƒ¢—%\Ú™Z0¸JÆ´Iè gº5Æúûý˜Â–‚u4^«»é‡pm"§ 6Žyq*]Ø®’ß÷áá³”ôv×dE´å~ˆ(¶ j°Š=þ˜•'ùè2%–Âyz_†¢í‚Ñ_Ì?Ú—ÕÛ 99.5æ[ó+¥HjU +† å†y“—$=±òüZ‘Z᧲rÐ.=xâSQÀ”K*ßÁ'qÆÐáJRÙFR#³íž&¥Þ·¸“’—N ÂA>œQ"¡p1\9Q¥®ÅÖ¼÷Âé³ +b8v,¡LeØã/sÑ]¨zžQêÒÙ¤ÉÆ´žË¥MÑÄv®µÚûsš+C¿¢6<§O§&hüv¹À¾·MÃ@*¡ÞEü—„»i=¦ÖyÇ ©Ç=ðÇôQïÂ&Ë;ÄAQ‚ôR1ÛA–ÑyÚL\{äïõU³í€M%ïÙr—ņ$ŸãŸAo³üh¹îQüOjLß5/ßÔŽvÅsrSû]*㫽‹ZýC¢õÿ¤G h¥œát<Å=–¹@&¡>,R:ï™}5-i6T SÙçˆö×& J]°À^i¢9õ/XþA‡)=ê–¼p¹œÚ™Y^=f®ÜC3ùç U¶ ~sU>í4ߧšî=ý˜-.†³ËcÉhC•t˜Ì1SU3_¡ñ®éA××H3ËB÷—d+Ë{°½€j”¾nû(s<ãœóØ«ÞJ±Tì‡n +é§$£1 <è4Ç¿ ÈSª´|æÅl bÀª¹2ʾF%6r.•ÿ<)Øp–óNTvZó’øöšé—ØÂ+Uúí#¬Áêó™ßÆu|òÝ„ýRþëqP«›_~YÿC–æ7ÓC`0‡"ÞNÁBJ6RzZñÒý§¸Qoó)Wd] gå7À½aºfMÕǾ¹…. ö+²*:Q¾È»˜ßþ q[Lzk8dfåG`ÿ¥©üÖ{a‡´` ³õ°›Çkûâ)V&í©h +„X%†æ认?ÕEtde +Š_wRƒÎ<ž œ:Õ`»É§*P³Ÿ6z¶ká®RŒu&.öŶ Ø'p”]=ll;ï ŒÑXæû0/Û‡½ëÇqÔêbžTU\úæ„6™äÃ9O­4yºÿ¶†FêªÙúA–bÙZWš¾Vfz›uÈvµçÄò…ù½aJÞh%W!‰¡“­>|CºŒ[–„£Í—m!@¬Æú?`^éåXy´¡ÁG»F‚¦)õ;Â-ŽÁÄÕa³•µT¤J@ßp+E •ËÜ!õ”50XÛÛ‹‘±ÂH¥¿óû³Íèÿ³È!_¡^)LB†[ÏT“PË£ºè`bîÕjÔt™í_¡˜!–@&Ô€QWñÂ¥³kŠ l­©¹ó$jì ú_;«Rĵ’Ÿ#õ«O§@Åèg¿/pÕ³àélqGŸã¤¥ÌØgå5zœ!Ž ܾÅiÙøæÁÂÆ4è;epëŲZf>ŠeÞ¨ì7‘c˜ûŒéUá-Š‰­ë¤tØ™G^SÞu@P×ã6•FêÓ´²ásèFÞB”\¡õ¨ +gÒË_Ó‚™R  +)G1Ÿ±dërsizè0z V݈„H§éŠxù]ìXØGÜeéË7£û´ÔŸ?ÀA€šÍ¤UÆ*vµs Û=;@#Žiî›Îü‚Õ½‹ÆQ½-~èD¹¨îPäÄF«nÊìTQ˜ Òuè—hgGW1¿Ë÷y“²#ĹÀUÔäðÏÔ¥ýÉ„o+Sï{ó¹ Yd|­/|QíÃÏwRoõŸý`a#¤²ž  R}û•d믴)%e{ +©[”8N_¨ß?g£½on/ãÐ</…^é2´FlP›µ¼}f–c>µ÷<ÜC3xÍß›ÿ}7IŠn/ŠÂäÎó?hyÙç:”È*–R 2=ïMÈaùèÐà–¿8˜ê¤Á}SäÏÆ¡}ǘ™ë3Œ˜Àh©–'­Ø3¼Äü£RñA¹৻¹ …un»–µêN„Tˆ·–„„á‹ µª}±÷Œ^R_SRéTŒj3%w],·p¶Fj ¤v_]•ÌYûŒÌ&¨¼ ãZNdšwÉÓB²8§{Ú€ˆ‹¤HLq‘¼ 2ö ÜTnØM#ßèÄOãî€ÀØ•APÛ>,«!J’çÍš¢Çôɉv#µ&’°#ÆYíþ9б¹dZ<XL{ÚðùlÕpÕ™ Ù´Ê—&ÊÅ"L…?")ð³5t°L˜_f›Tl€ÛÉáÝçÄ£4ª»Òde£·ÄP ¶ÅìÚ׺¨ÎÍÕŒôjZÐ"å=ËuÚ#*¸¹ÚB2àå=«V½uŸªv¨ç¯I/cKµDçäºh†ODÅΠ¹Élw,}wQy~D f% 9ã97]`£FŸ÷£ê¾™o^vŸ„îÑXárˆG˜‰[Y.{•ñ÷Ñ9PàÊ…jÛ5·öcŽÅììcônÁF–`V|ÐEGõbDÜh8¹éÈ3‡×øš®æWh&2˜|=™YåŠêï}Tž!¤11Ă}Õ˜ÑÛé}ŽdÐéé/¶ú!”;©ÃªEéRWÔ´A5dXRFV›ÎEÒèº*%•&/Ü$±ç¾Ó…º Í­úŸ-¾Ö Jèä°\äF–‰m?L \vª@ŃÚÆ8<ÓÄÇç5£¬Ëk@⢒MeokÞþyš} ?ÏÖjƒý¡ BE“FÈžá+w¸b¹ñæcçS’z‡Ãü¹r˜Ê=Λެ /6Ào§²b—[1¾Ë¯+có ™B£c ýo&»Ö˜î“:É£ƒ„:8—ÙÒC!Õ°ˆ$Q8–Ÿõ3ˆ}<3`rÛ«8>§C¸æ¿ösIÁã&Ab ¦ÈJÎÙà-žÆæ.iüm³hbÚàË™‡ºqsʼnÚaV¸\Íuu#2eˆ8¹ñ(Ï÷Ÿe‹Äß–¢ØÉ.æõíDà!7ø€Fx‚cÿn  ãU›Rn" f™f´â$d ¾7š¶Ûž›Ýë³ßª]LòÚ-aú®þ ‡¦‡Y LÏU`ɼì"‹>F}?Œî2Z¶ÕiØ£²8ía4CúÈ^&èp…aa”釱çiÁ"Î +&¥–eIï*äXi®ŠÆfKJ™ÛÁ¼ËT‘Ë@9’tûÈÁ×Fžì(Ö±g·qôõÀÞØœ¬ï1Y2בY^þn+1n—È;þïyü¨Þ´9¬#¬~I 9ç ×.I/áב™¤.LÝ¥¾¤ý‚°èJAéæÿÃá-eÑ¢žwùAžN˜šçˆ¢Òø‡ƒpjïo¤5µ‡àµúÚR},D¢ë‚ƒ}cÚG¦K‰ŸO͈]Ká&îæ¸îQ„Ð!éeSV´¼›= +DÎG@|Skµ÷Šl;¸cêÒr Ùgý•He£úÅ<SZù&¡˜ÜBë–Vž€Ü×"—{Ò骴?>0¬“Àw¶ûòÎâ.”ÉàW›ØYŒuKa®ÿË„u×»)S÷pçõª *-³Iföv¢å´9õ÷&–¥ŒZMúPwÂvtê)Ч‹Õ÷R5°Ä6 ¾!?¡{o@«ñ½ŽŽjšâÚ3C†œÃáQtc%;¢¯^œsÏL‡_È{:“ç7û.éRÌêÆj–PyPê!3À±ÍÂ>šËr«f3þ×Y‚éh?ÄN” ¾¨' +ÛOò KDJøä1®ì*C¦÷%\lŸ1ÙHëJÌ3¯Â\’ÆklˆxJ¦ŒÆÅŸPLCô,ùþ­˜Ðêy ÛBa?ÒjÃ’'trú÷ÁGf̉‰-x’Úѽª¡J¸6æ wêeñGÖò1´a"]¾IR³›ëi¹úf¢É=afÙÊõwJ]Ü;¢Þˆ’·CîÒ©±e^S«BëäN}Ô^Mô+Ú‹[$ãäšXõÕJKÒ<Ïä웊ûMGÑzÄTظ'û j—è@61†™¿s¶?–(Úº˜oúü<ù¯4 å¶Wp VÈà.ƒÖxÔú^T­ãÙ@„¡Ü)¢|)È$w}ß}?½)ÁÜ\‚$Z·÷9ïùý­i•†ºMœÆûƽ’M[¤vÒ•¹Ã"8šR}J1·3,–÷C®@ +Ò/ëëZFRÞ¬ NËtZB=‹ü›ô¡Õ&%ßX1Cê €–ú†ñêœwwʳ&Ušã)Òc ‚’á2Ððž¹60ñô*­Þ´! k†Ñ¹OJ³KèÆ£wÊG4ÇKyã´ì5áP&¹=‘Ä£F1¾£º/Ñ2JãÆP…ciÐnýÆ—f"MƒFÌun§¾8VÁí.®¼%uy[i3vNµ#œ;2DZӈàž¾ÈùÎ1ÆoB8Š{áköJºãf€È­¥lÖž&A©Iñ`Ì)›–­ÂV{Ž[6í_Û$J©.¨Ýd®®ËöÇdÎAÉLá;Òy<]Àö eé¹×‘)Í6¸È~8AøÌmÇ9ÆGš¶CT3j[ëÁn7ìGïùÒa¶€º¿.îCD¿^’Ï5Zžóð(\A8æØ'_|Š¹˜@¤–Gù8gj\®Bé-? +"›‘§|gò3› p·B¸!ôÁ”°Ú§›èá)ÆéÆGØ%÷š°ÈJã9œÆlMoÙêœ ‚Z/ÞĮܑӦèÂZeÅ gM<¥ñí›gýâ}ÇU|Áð†”ìlr䳫~¦YÚd}âR•÷Ót”ÎùÝR—(¿ü1§›í•¢_*J§\Ñ-ÄÍyºu[J¥jrA%Z’׋=ÕÐïÿLês5'.ÒΕp\ +çýª–Ç´†ôlÛ\*’ŠO³ä_gŽ7·Jýbû ;lžE˦Oíú~7¼,'ù12¢…w?ÁÛö–d}ò³Æ5÷\ÂY…Ê¢²I(½ˆpmŒ –+_uÍu”­¦9—£cæ1¨{¶æ¨ÓQ·Zê%‚11áA´Ð\JëŠV;`öÿfsæXÌÆ;Î:M4ŠÑ/Þ  }‹íÞ¬œºj~Vè=kj•‘=ЊÉ‚gäÎéž$Ê'Åo»€*C2™_’ž.´´ +ý2ßÎ2&‚0DÙÜŸžæ¹ußb#”Š‹Y©Ó?&žOÄG'‘Ò7¯øE€´iØÍ”ÆÓ'Ð +z-Z)ÏÛýZpâ)¼ÒšrJ䱚jïc…û‚0ñí°D¬˜¡›#+‘kÉÔèÌo{.^›Éì[4Œ—‘âÿîœÔM)ñôv£Z(»ýSªÔã¹é$ksFUñmØØÖn8µpÎfŸeq Qx‘P‰R®ªe™ždÖ•ÿ endstream endobj -1982 0 obj +2232 0 obj << /Length1 1614 -/Length2 21998 +/Length2 22531 /Length3 0 -/Length 23612 +/Length 24145 >> stream %!PS-AdobeFont-1.0: URWPalladioL-Bold 1.05 @@ -27295,7 +32694,7 @@ stream /UnderlinePosition -100 def /UnderlineThickness 50 def end readonly def -/FontName /TVMKYN+URWPalladioL-Bold def +/FontName /BDDEWM+URWPalladioL-Bold def /PaintType 0 def /WMode 0 def /FontBBox {-152 -301 1000 935} readonly def @@ -27309,96 +32708,88 @@ s(kz Ó¿EâLGFô_™îßxÄiÞ¬‚Ç5, +€T üBÁ)þ¼‰äŽÿ©.s†€«ÍLÁUt …µÛ«€Ã±$)Lè¦ä xM¹ó`Ñ0õ„ß^üÓº[—U´[éѸ\Üî²™ P0¢þØ“ 뎇aWºî³¤ê;ü]´Æ ̨۟eÞ&§6È&þ~sD3ÈR¢œ=‰çŠR–Ck Ê‹6ïá>O2‘S;ì—z250Œý"¯²)ŒcèÞd/ArÿÀÙ;òK8+“f^²Ñ9ÀEŽhpFRtgŒ,,zÑf€x0±œ“5¦z êÉl“ʢжˆ6\¨E+PõOCêj“ðˆ6=,¢œ3 .óh­…aD¡ YW0þ„šÉ ‘†…râºGüÆßÇSì£W­=¡ûø˜¶k½¦cnÍÞ0¨7 ?Ý›ú`÷ߌÖD©çì«D}Ò£€Ó$‡!”g *ýV¬µ#ªHô;(ö˜èxSG >Ú|írIúÎx„:[[O£eÉÚ>8‹^tûíÎá8Õ…\-áwÐk(lÙ.9FÌâ&S6¥0J½r‰#D‰’àÂÿ­^b2>PèEoó¡#­C@—Hêg @¢aˆm°1%-½~@RÖ@,DÃÜFXÅP¤c*ûöõ®~G áf ØÁ »»- -kDMu¸‘‹¦Xœêà2+m˜ìQžMý'¹é…õÎõG’¯j*4×<#BL’Š†²l´N‰ ] ´ÛÂK’[ÁŸ^G |ãhLV8œ¤6K[ƒÜÌS¢…>ü‘¥êÓïšÀHÛw[ísýa…jÿ_:@³A"ÓD"ŸZ0­†äf„Ú’æ»ç®¡v¼rãVkÖO¹C5&Òl€<Ku<¾u{ä-›¢z’-ËGFø·ª“+PiêáçCñq@ʳi¾Ž°9h•BZØ´Üch>ݶ¤7¼§ÛÑMÉ -9³ãÀ»Å Lƒ3À¬}1W_^1³†º}R;ÒXÓs)qP¡½¥d¥ˆª@šOPó-r8#ì•h:kûU¼öphôu`p±)=T•·™ÅìÏ/y<¦h>Ф‘«ë@¶À×T­Xûù&©Rt´ûõƱ}…ÞtŸ^}’‘>ñ!I>5?ù/²Èñ•ä>á’) ŸVwǚČt±¬¹¬´¯†7ȱÏi‹†®]x¡Ö´ÃÝ/X˜!U+–_(¹ÇÎTåþcmç^ú*·µ!‘½¯7›Ñ¾©‹Ø˜Ñ 5ÕãÖÛïæ“Òwù?z¼ÖžnkD{xKúr»ù(ñÙ8º·¤°Ÿxl‡uÍgéõ0_dŽ Á%et®ˆ'3y8Vþ»8ñ:¥˜.qkIË;ZPóÉü©’(fœn œiÖO<½SÞÃxw±,ñ„Ê^~Nªå±ÊåY×°$N¼Ì©ÐÇXV+)2O7=eÙ½¨ “Ý¥õ núY’öÛ3;CpKý¶lÎ#OÝÚ>¡¤4÷Dã¶öô’øÓª·â’Ž‡D=oàiuˆ8Š¹ºÜ6V£¬4ó¤ònº.Zó³‹Ó†à¨³ä/|¾Ãfæ An&‹0ß&ü8‡ –VoIÑq'ªyÑwZBqÛ"Ìú%cœý­«²nn_ `5,uZ"uß»>‹r—g;¥HLÆÂ߇ðñǘ‚Ö—€÷Ëå·–ÝðifãÜæHÀiÉð¢ m²ƒéI§Gò°ë˸$eV ÒUõÐ -ÉÈ.cK±?ÇÕwžhY°æW’ £G±t™-¸ÕB¾Æ1È ó¶ó£Ì,ÃЂçMEÝLÍkGÍz´m¢™1pÝkøEMcÖ׸‚² …#HÚ…_ U>¿a‚ñWGMœî -åñ0Š#ºë$kaŒ¾H"^@¨ 7œ–´dl3YMŒáov©>_‚V:÷8 y§PK¿[¦±]W6‚=ŽìПٯ#5ÌÜP+N|ÏQüÝ;ÞUO*CÙ$_œqúýj©¿^†ñíÞF]~Îdb×½0ïGgPêâ(Cfƪò²’|ú)4¹]~eT€}Æ’8“KBƒÂ3ÈtOÕì„K)f÷/Ú#ê:zPûÅĨÿ® k•9#&w@“{Zó·)µ~at·F•2)2Ã0ihª?ÏmÏë[ðò2•Ï°`\¶PËVyÅè a¦Qþ¿Û í¥“]ýõÁVÁÕ 0ƒ¥L9{ö{"ª%º€Úû#€Æ©N3g|›¤s *Œ&ÀéÁ;t“oDû³››vÿÃÆ!¯³A)/™Hý¯Q5²3(@0nÚóA%5Áã -ë A’Q Æ6l`}­î>–½~Œ1ix¶¹ç3㊓fÑ•Q¶×?®pN¯5/Ñ -üðÏ™$ËfE‹ì²±¯fƒå VÒ›JC¯ÁãS㓘]k†*pa…§ÈÉm‚ÙœRÁ€pn2Êt¿Æ_K‰Øº•®)TyonŒ«3Bt æ´mèO‹zäÉäfbXzÝBeq¡Suø©±Ž;Sq‹Ê§n¥ºü´ñ»¹ ;°äo3‰ ¢3kî(}Oû¹Š÷Çkˆ jGª,ƒÀ­#7‹úXª7î ò’Þga°¹â*GÈ \ƒJ³‚SŸ»Ø\âù—å3Íó¾:ž%ÕÆÁoökYב‚Ó²5´V"fA –¾p2ºBÞ?[‘ŽÑåWX|€T­ä±–ÞžaÂY©¼²wÚ$KýȮ왆IC@ÿì€+–°XM þ£8›¨dE ™:v'½&•Ë è ‹íŠ:©ÁS&rÜ”·µ}N÷Ť§ø­æRB€z Ö›\cW0 kçù@^æ²}¶¸´Àý*$h!çWþŸ‘úoóiQT›ºnŸfµ.Ú®¨Ö ×–½ë¬“•GçÐ5‘—ûb®“Þæ@zjyö:]o2>é*£FÊù[fv7s“‰Â]÷°±‹ -:W‰z­ÏzÕº°|ÁçîNù1›ãá‘ÃïƒKÞ^ÓD÷¹uyR±t?ðƘ¨û -yvvýûAæÃ)¾*k4µE~nHXè-$Š n€íÅŒwxUîÁIù\…> -ºIuž„ó®—D»‡D“2š„(WH˜¤C`Àb«m‘%›¤m­@QX´xÞJÍHø H¼2ŽH¨~P>c{äCzÑsþÍKnðéÞ¶uÑD:¸kUZµÒß°ç¦Î‰ê|’àÆõ’·Kô‡Üèz!Ï{ž%–í-ö”è»e/’ëÐõOTª¢ü:Z—¢Á³©2 …… »³Â÷O] Ú×U–*lGc}d+w•ŒK+œÝ‡ö5Ù3¡ü!Òžƒ®Ý³Q¦¶{¬ŒZDÕJhöç÷z]r{þ©))ÿÓÃ#"Qjg,éiÅOݼ -Ëó@ ±ÈÃýÆZ!àe½Yb[¼WÞxí“ã‰êXZw¤nOÈöwUë¢ñ -->Io£¼;fNÓ²%Z3‘ÉÒÔ‡Ð3mø*ĪÇNA—ZAÀ\Dãæ…9HžÆ$ÿDÒl7#9¢¶Æ¢ï?õìnLs´…eÿª -jåw‹*˜P‹”ùÕáR×mžzìQ_É_A™í·+?c ¤¢%¹s4W½¢0¶6âO”5å$~ÓªV”I„hŸðm²P<Ïó':|Íó¶Ø-NŒésÒF4/ì‰ÖO9®½\)Q±mtzûœd‘õS˜¶"T&f—&ӵǩ5¡Ujƒæ—z&.Û1V’Ö¾)KÎêd·³¿ÜÍÎemô²ô>~V\su2±k +é•EÄ8fv_ -ëê>eAž2þ Ïó.$8‚ÈçSM¼UÞµ¢a·ßBha=¸Ò,i–˜?ããÓÙ–‘ytûR'@ÅøVz›kb,Ñ•í«F™»ê›[Šlpõm¿Â™,»ò¢åª¤{é¢zt‰B§;Fé¹$ -1å†+B›û3Ö‹8â¨lH’EhÁ G}ØQˆV_ Œ\ŠEÆàßÓT2¤1™kÔX”¨ïe ©I½xüW7¹ ö;ãö’E‘Tâ«,oM‰z‹èÈCí"HñŒ/$f‰Ö¸z¢÷½]aÉ¢æºOw 4n»¸™ñöŒ²›O¥½ž}`ü’–Ëlyb%¥—g(1†ɫ@ül3'Hঃ¯ì>«rÝŸ¼—ëmÅþ*©H®s‹ñá["Wúô˜ÆòDìƒp%÷팱ïó„B„˜¼m¹2 ×}A~,òŸÂJNzñ÷E`#Ë”ÙïÑ}’@ˆÖ’VaµµÄü-ØcM lÿ)»5mbÙ›“Q3ð €> »ËTJy©KÁ: anYUª©ÚeÖ­¥>3ë*3p(±FâNpŸßß∔Å!tû_ -(&¾åýC4|®–¥iL[9YûªÝkZøAñjÎùŽFi6C+Ù‚¡®k¿7®ãZÔë)éh¿‹§¢ûÒ5äu•ëi†P‹j™'ëdp{SgžÞ <™^{O“ [ß~½¾)üæõo“̧ïÄšæZCãYõ ¥‹O­­N© žÍ;ñ’Ï™o«|ª`le+ædcöxÁhÕW¤¸¼Å2ÒkÎñùÔ’Ö7÷Ü‹ÕØé -MKõDúè¨âˆÕÖUO+’(¹…öˆˆ¨†U ”4bEKq”ÉËP«š-爠¹f¶½é¯"çæ¼o71ai­‡ÚG]Ó}|sï7žly–V«ùÞÚ¥YéµeŽÜxhW¹G4•¬DË•@—I~ë8ð./,ËÖÀ©ðQNE,„pñkÛJD¡5„å9K»þ` ’Iží%`ûh+#‚V£ …øÕYmè4Ò¥¶Ljh€½ÕØ]—†2j%Ú#!þ f¢`*Ž_}‹ îe|'8X‡ûzÏúœîßàÕIOÉ—¢åuÖ`kHuåÊ·N£ñ:Î p‚³GþD‡0ŽÚÜ{Áÿ­O9ueÏ%ˆ›pb°©ëŸF Ê ]¦¢Ê.Ôb’= ÙÛHà®2!5ù?•àTgZYÚû7±=IDw@ÚŽñ ‚É=Üäh¤úµÉ8ÐÇÅ–BÌ è+b&W -9Ö7´Áà{qQs¾b)ÁZˆ>`S÷/¿Ï¬¤ÕaÒ‘w-á4{¬ô…nvŽ=‹Â'}×>ÃÇ¡~6~ßžº€eus#!:„ÄÏ -FÓE®ŠïÑ.ÉÈ?*Ecžše¹ô§ °VjAÜ{#}1/«`„ô›Ô{Vàùþ>L¡tCØÀCݼ‚‘uɦVU§ð¿á+@ˆÔwñØ„³jgð$¥ÄÔ !mµÌ8í)šïJ… -U+žÂçr÷²e¹p4‡o|v2¡‰ÏD % »p~´Ž»§4© Qaû¬C<‘Å{óÜ>l\$r½õÅl+öºB?„ÄiSÑ‚Ï–êñAíÈJæã+X«kVà£Jˆvøø;wÑ•ÎIZlÇÑÿÜC0º9þoù~®$.üç¦9íºdpÅYÍÇìP±`% $“þÙ’WNÆ¡éݦáub|óoJR¡~Év9˹ƒ‰]¨÷陂O•½Ps™c–Ê·DÐÔÏ,݈15ÅÆ¥ãRÆtwÚª Ų$™x£6‘츙s`+%ÜJyxÕ÷°í9"Ä=¶ ÉŠ…-°R¯õ¿¦NÉj–çƒÊ„qæAŸM±p¼±×±•CÌ£Š;À‡ŒM.äøÓv’Þ=ô Eßw\jÖ&I3µ‰®dì)q›$·Q“Ä;…Æ%’÷$Ýgº4;ÕçBTä;V¥¥Ü›yôU0\:­?šz2Pµf&ÄðöŸI^œNq˜’ S;ù:¨M!UºÎÀϺoø‚Yª€¡G4C’º” ŽÈûø‘†"ìM]h⵸D“Tí…=xzoÕóPÆSãîmz¬«wvÝö¬áÂpã¬çYÅ6äYá( ÔzÔŠë=„ ˆÊ³ö ÌESQ1f /˜*ý2N0Gâm_Þ€s€d‡Ì¡ <ãV}FȲúH£Ï£/A0‡0eÖ2bèFkHbñ*ÿu)ÞÕ5SxV"dÚ)Ç"=Ñ@ªfJd²ì½m 0R·åŠ ÌÊæh·Ý·W"Î5ªE¥j²«´ß>òåÍ&«åÚ,Ñï /a©ñ8óN÷À€ôÕqþ;$ÊÂ$ƒFXÈ“Ô–bf^\ã ð„Úƒ+~¸ Üq…ßø|p¹`ã^àv¢ß˜Ì©*É -÷1ÏàÌj…¬"ž,Ú¡&Æqä$?ë‘ -¨® °ÛÂPª˜ÓÐ'^»â¢€Œ pBx•¦Ä"c׳Z¶Ð+æý´¾ZvÚóSM ”½±x˜’o?£bÞu˜¢ÂxZ }·lûÛ7bæŒ%ŸÊÍå3û¸D—bö㣘%»)žÓQÍÌú¦ëû$(6JMØ+¨€§¥@¢M킯2nÏA‡ ã~‘d6‚ªÚNËø‡ÖfŲ̂ß|V5w[†ûÓ¿}:ÑÛ•c€g¾ $ŒŽ²ù@§Ñ† .H/eüØGiÜ3gU;1c *´[‹ÖªÃBB_…Êëø®o3šÊ›>奮œ©í‰‰T™ý&ÙG@:¬½aH…‹<Ÿv]ý™Ö+røˆÒ;ÊZëAҡѪ|ÏÕp(Nîë#w_@#AŽa½we¯@ÔGä[/Š ìúM:X™ëÀ¹Ÿå=ÞB1ÆéÀ:á‡9Åf•ÐZ”‚ÿ­¨Üu„}”­º)R¢".I‘š ÞDk åºò}átCëƒ|X›Lﬕ[ÐOKËYÖýPS†wôiÆ5-eŸ¦óp?Çš -&-ð2Ê ê¾!9Ú_½UܤŽHË˱™VÝòéoÊ…°…ÊÛùÞÝ £×¡N$=ݼ±/&û»œCÝ{÷f]\‡ì]›‚³-sh앉qöÝ]—éïðU°j C á'ŒUBmêN£2ÕM!a¡}ñ8óÙõǯœíBC'¨/A|eâãû­;¼Õ"Îì[|©ìü;iX™±òÌ iÇ]ÚcÿÀr1¦ªÛ ÓµkH\%v‘(T^´É¢@É«æ ÂR¹WßÁ©}R,°ùrÛ–·rùµ%(à—Ö.¼gD¢ÕÊ\ë#@ð&FíÚ¦°E—}#!_§DNÂü¸ñ >=Ü擱fEÓTåNÖÉ~Ôõ‘b6EÉ’qh•ûúÈ— ¾ ·ŽÍ„n½G׎ÇKECõR¹|s‘‚ÇÆ¡Õ,úv˜˜ùOÜF5/cìä°†ä€ûT’úêHe@=ì|)µÃ>ÞÓwìXŠ×[epïtÀ‚YŠÆâv//tN½®“’%š/Óœ/0q)x–ä'r®6‹Fÿp/ý2ˆ}2ÎÖ":Í}ê>`ŒTÚĪo(ËUÄw:B– -UGE.6†köNIó—oæÿ¬Š­L¼ƒšmªÎF¶<”Üçƒw^E§ë…T…šÃXå‚çÂÞåa{¿ìÊïý\û´ŽãÅ=57xÇÜY“î‘MÚÇ6Ÿ;Òp4=™?"Eví’bŽöU ¶·ö»Ú qØ]o7Ò½éáŒJIî{¼‚Øë¦k¦Ë¶æ²;Út¨¹z?#íû‚ȉÝâQŒÚ ÛÕ "ªò°$sÕcej“Ïmkí¹ïðô¼±U‘M9×UgÖuÞRJqF6ty>7D¤«>N"×ïVóå=szÂƾ‹ªé> Ç—¢„µg­œÕ9aêÑ¢hC¤pÇûªŒ-W㉓ˆ~·çí‘ËC#o?T#=eÏŸ‰îð …¢l– ûÞ¯îj ðfçжMÔ±å,¾ï:.I ÷™_l`×R+9†ÐeôÀãM3€i‹c<ŸòótÂö¢;¶-g]©ëzç$¹¹¨Ö~ôÊú&)”>¬¼ ̽…'ð“4"æ&¶äåâÕ}ØÉŸ Â=ÕepÐð‘5iü‡p¤·c†Vj,)D+1åÛH¹°”›|Ýy\Ø<û½5üo<7ðñý»¥.«0ÍaÇDF|¸ê=ê´ï´š§» »jp™w,½J^&*G=Gfr^l"¦óÓÆG(cªi5ÐDÖØÔ‘KŠù£sTÇê«à5ò¡šÚ蘷ðlåfµbÕ²Gq2„h*T€ËÒç1Ù<[p #a;…049]&°þ²f¨.þ%D6öÅòoN(„°…c"µ2E¡>xl4„1‡°º_èwóðÍв¬ÖN<猘gõ¿ÍC~ yÒ½½¦ºœÒ1; øvéš/䥪¨”:½S åœ!ýuŒ´'ñâËTcÓiŽÕ=JÍtb´áµ(¡š.PîÌJ¿ivªM€äû,dÇœN{w÷ÇêÕ…Q¨TýÒ߸"‘Êþ}vpNEâ(‚í´GþÐHÇÚßo/ãò6cÝ=/5¼e¶ Ct˜KŠJåbý5S/Ìe^£…ÞI+²v® ðÓÔ,«#õ”%xDú‚:s;²û¡]ÝðÎgOŽ¸+î݇ñv‡¦á¦˜ÕV–äÚ§ÆAp¥¿%‚O~® - `‰»‘ù=êS¦†ÌÒÔ1ƺ`®¯9'{±;°žh?Š¤¶Qé"à]™5%&i'V (ÔkئÈ󨃫ö 4göªy‰Ô:?ü í=‹7k!ô;Ï.7‘ª§3ÞFüœ ÝRTþ<óVÎѪ¿…¥e&¡S¡½ç°zÀZÎljtx–Mïà³Ïv§Äq·3ă6f“ Ñ’žvîáê˜ko§aÝh„[‡ºøºûl]†Îˆ†4˜BüºjÖ»`"whœ {'ow;`÷8±øÂQg6Ô´û1RÃQå'铧æRv iã‚Öti7ù¨Ã@N>¯…Oà¼Eî­rUâþ~¦0ñ÷£—„<7Ãf"ï…Á8£ÐŒQÁ5`ñBª–\ãæÏ7”šïú¼?Ûâ‘ŸÖÞÖB@‹âw˜Wj(¢ØfúuÍ9­tzéNuîºÒùð‰U3ÅãR´×ÄÁg‰!PCÙf%ÿˆª.uã}ü¹ÿoÛÝH6­Žõ;æŸ;º=ËÈêl Ôí¿@Dðâüü,2»ÜѶËÂŒØ>^6ͯY…Zö[ -’„Þ3ôdýûðŒ‚« =îP…It4[ÉΓ³õóä:ûcøG¦ëŸ—¾®+êFcOñvíšYxÅùrùrǧ‘ò²•’Zç:Ô€!p U${¾Ÿ¦$âÛÿÂ<¸žz*m´¼±¥˜¼} FF主‰ Cúu”rù+ßÙ‹<¨ÆJE²ÔÒ­µÎîØ%ÜÌh}·öpAǽH²¾Zƒûã6ØBÏL×È*V¾”ck3 L,JöSöù\Ú‰o錋Ð1^MDætV@ÞF=y³E‰6¹êÁ}%)¥y&?bÓæÒbùs‰'ë*¼§AµW%h†cBMNƒÓ“  ~æåÊÉjrÈæ'ÂB^fdF]Ÿ¥z†òé9HAaÙƒtõŠpvzçÂTÓ”Q6y7„Ã`dIbfô¯<©/享]0€ËÈlÅÛ&ÖÖGºBä¸H䥭‘9éØ,½)óò£nCp“‡O|<`x{]¥|$K%›73|7Ã}ó*±‹ü'žF=<1ééCøÄŠŠgR(­bÚÙ^iNtΤØõh\Q G™§»»¨É11V[컕ñÞ¿e8&» ©÷a 66¡Ž%µ½Íp`jŠ€p¸q}°î{/ãEðB)C_ËÉWEè‡ïìWð.R¥9â–²°tiˆ(½ƒÆ­ž…Ù¶Ñæ2’Hr7‘n(f™“>ÎHZ*êUaqÄ©b£ºMêW -œ’õp2a°›òWI[‘°‹¶ì˜Ê¯·™;§Ì‰{¥$rtÑÆDâBlÉZÄsˆvöõ\¤:+y\Wª‡>¨7”¤¨rbcÛý–5±iõM7°–!- ˆi £-¬Á@Éäêsé^Щœ¶ÖígØEæ/ÉŠÖê¡œ'›vÝØëBD„»ö1¨¬«ªg·D ƒâ¯N7ÞB )œk(¡p1®/AçöïQA"4™‡=–âÂÅg5%>eÖÕMÀpü9áÕPRÈÇ ìƒ4ízéÆ“%žÿÏ…o2g¯(кü§÷çâ$<2Jðbš&›Ã¾ˆ¼oÕž#¿r(¨P«_tÞ(’*¨íO·¸ èu}UCòq#Ô†'6ÔeNñ"ÌF›9ù[×íÿLÿ;{¼¶ìCOpTq]Õ:ê›?vÂFS-¬È´YóqõöôÑ7¶lZ­Ž©A$½Å†õv{g„APl™_Šsï˜oy*‰8IhhÒñø^ŸnSUT#ÔÃloœýiXXiMô»4Oi•ÿµÖø»ß¬š>¡Ôo1=‚ùúÛ®Tš"UÇzß”emŠx–†Fà v»þïý\ -Hkµ‡ÖeîNÂà…>ñÀ¨J´ ƒ÷MÿfÔÿûOŽÅ~ß9XÎTЗšq>ŒÑ~<óú|ÄÅ©7Ë¢Lÿç©Ã™¢VN—9‹qpÙLßÝñ^2vÆ-ìÈ÷½p‰ æO°*Ê]ó«VUò¤k–/`’AjæÌ( mÊ/¼³¾[NµñjB¦G&ƒ·»„13óˆaòÅ\÷æ}Ý;¾«‘çÁ>ð›%Òx{l7G‡*±1LPòç…hTn‘À‡] <±[ˆ’ÎiA±k°\úT ¨ -JŽÒ³†ã©:Eù8 ù0ÿŸB§ŒÉ®-4‘þ¯SL¥(åUƒÃŸíE÷µ J»~­¾õ@-sûe[›Ä¾¨Øú@Û©-Pܯ„@Þ§ÓØ7 -‰kzxm²+ -™g‘hÁ‚5Ÿkò°~ªââAö| mpˆ ‹,‘½}¡]j ͆jÔ`N…—ÿC\&¾%JçÿÀo¹ió™ép€{ì׫(÷¿2‚Õ$ñÞ5ÝØÕÃÑ-&Ëó,»·êË]6C¿&ÔáO•±•nh§ý‰ÊÅÑVEÿ€nP(4G]xÚA¹ôͧÈÊêOîÛ\vËïòé„ê”Ô9i›á—Û™DØ[‡ùÁΊäzÖ£W'ç}ÞÔºÛ_ á§n8•¬ -ÿ–S¼¼|;€ˆ® ŽÿTŽ™¿6)Q“]éZßâ«÷A€-1ùª@f5VO2 mdn:“\Y·sáóýÇñ›¸®?í’DM`Û•Iß"êé²5êÔÍî–å“¿Üà£é"*\¥0Ò]¯Q¦R6µüjBÁÏp?ªVxš|g«7Yx«[þî1íŽÕO¯ï§ç9îâì†q!—ÂÀ’êo7bW)wRG¦¬eÞûÎeJ"D“òò¡°Ñ?Ý÷æG®(² …Ü®¸ü:¨ß=X†HGvj¹?r­i7Q¶¿Zèº_JáÕ?òõ.K>¬ŒvBÚïgÈ ÎôíWþGåÂøw––ÊWûzù„Ö¼–öô•*¤§© mCœª -aA…D4ñ9Š]yóóqJLÌ/ƒ°ßíkhºr¯†º!äÔdŽGApX¶—N ÉA@@4Zß_ˆè°tuž^÷ QMR N"ÚzLpPyrl/>äÅ7X–TB×gª)éVð*Ó^uÓe2<)ª$"Óv/r}HX†óe—ÿ1¡•yIÏŽÃPFñVoY^d{ 5`àc‘½é %Û›FA¥¶¢=¥F.§Ie¡QèBƒÂÿÉû¨·ùØÝ|¯µŒþY΋n;{6NwÚ0ËùDƒÁ×ÅÎ×W#‘wl8M=©{üsŸ÷ç2Y–kF5ÿ8˜|ÝLÕò´Ú^?1a'a}*Ḉêw"ëB_9g„(‹QŨ \r8«ˆ>>¥™Y£d -9ÌÕÒ ÙÍ ŒØÍ¿£Äs/Î\«N|fÉÎFœ6LiG¾.d¦ÚU¶Ô@¥–ÊmC>LñhX¡ï€äÊŠÒ˜·\îo üû(b¯¬Õú¬×öd×j¯•ÛxLe`=ßÞù,¹É¢_f?j(¢JäMxeÃÐZ!~ÚýØz/ÁZeÃõ˜"ÒåXw`Ïþ¼W9>IØ Y¬›pj=ó.õ»uäÔȨ’þ×¾¯FÂWv\Hûv;®ø¸g ño®ž_ŠbKÉaC-ß° -ÙF˜»ú„°9ÿ¥q=éÄb;˜¸Ó·~•;6 ?¡î¨AåíM8g›îqZVõ½OÖU õYñ½†â³ -ºÓ{b|ñÇIþã.Ò‡ AŠ-Ã>ì¼™q“¾À²žOC°XM(öè ÿ•:Y@àÌÞ2ØÝøý€A©ý ΢ü°]ÒQ™®4N_ ù[}ïcBu{hÕÇZ1?rX É¸©ãˆ44YêAŠÑµXM5¦g7Z ß6Î2Êüg±FÚ’ôÚì{åqÀ0½y‚³&Ö5¢ åXø™|å¶D*6„™±–‡åñµšgëß鵎˓9åE.³lÈ÷Ñî¥Ú©7z¥¢Vöœ”P"E}õ¿ OæBr ¨¡ÌyÓxï¼x„"upœ±/à>‡%¨s×½ÏZà45œ‰Ç;uz“íêÏÒIññcºô럗Ïݦ¦¸R‹Ÿ+¤´ün -·%ôbâô:ÛN]kÀ;aœOÊ\­;a¡]cüY–äQ._R¯Ï‰µÊÒÎÜ‹Raï[Ñx×HÃÎj°Æ¿^졺A•Äp›¹Q²HÎNîz×Eíó›Ñ26{7¸?RßE'æz†ôì\ñá;ÁKÅR¼"$ŒlfÌß3Ô•–Ù€ þ¼7‚LÀü`֛ϠÍÀ^EÕŸó×(3Üp,GŽìØÛŸ;¦¬îÒ‘6 ³if˜g“ÛxÎgËæ’Ò$—ˆÁ[x &ÞÜ°$ß]쫧­ö‡°ŸvÉ\/<>ôBœ^D_wÌDE¤¢ñ°qB‹ëã¿áØò’Š²u|0‡7ÏQÜÚáÄ´È<î‹\¹o=š4‡dTþ¯ìÇ—³ÙdA»°r¤îœHtƒ ¯Ebœ]ͨ4 /Q6kAiÛ¶ÃÞÞ.Ç=äŸ@ Ü~.“u’ŒbY9 ¦›è/;çàyî5U îŒXâZ6¾`¡s“‡W}gÛèð}Nch—ä˜Æ*ð ß7w§ßòéq-øS;+mB®¢Šø¡,NbgÈêm‰`z,|`K‰e±–ÏFÍeÉaߧ©×X -Á[ÈfVl³,?ºšæ°®Mƒhà›ªºÕÞòÀèTîÍ Û 0˜3/ØÎl -9:±½Ë¾"NÃ\CÞ ZOÿ0^ÑTî3º¨íh2+ƹ†œã¾n…Qã:õ UV¿÷Ì„K+q…˜ð‚\ÙtËz*)YÚÊŸŒh|k¾ij¦s-ºœBr8•W•¸µðvõö×s ~öy -÷¶—¦ðz©_ÂX¹ÃÁxf"b²bÿÏ+{k<å &NâÿuÉΈÑ}L0i{€™ò<½Pv'ÚdìPP:øø —Ï%ˆñ¢KÛ8´Ì`dIƒâ&RR!žÝ)˜IBÎ>ZiùŸý÷}…Ô37Yƒ¼„rù¯?ùéCnˆ†­·¨È$#˜ixWh÷£7!õ£ãª/n>ãžÇ¬o·@øîÍ£6%› ÐË?(§Ý˜•H+骖˜¯ô&<`™¶Ó´K6a§”-GAÚFâ–âÒÄMn½MŽT.Ȧó‡9ìncÜK•ÐNS÷‘Y—XËZM$Cë: Ôº¯÷ x¦Â¸˜¯·Kïu›ßͧz`Ý%i¼¹ -„ òcF*U ê’‘Ù2'þc¡9`åj¬EÊOlÑß\ ©Ÿ…Ï—%zXrpð.F[«œJ=g° ¹ÄìY}®<±¼ÝA à…z´W~&™ñZçáþº|áw¸f­{§µ}Ù–[[B–*I“PepücØ.豧ÑùO~ÿŒÃ,M1ƒ’“£¬Ñ’­Ê^ÕÌ«Â˳ַÁ`‰ç^gŽÓ™‘Uä¬\šcYE­gþ7¼slõ=ù­†}Ù;h»ü·x³v]B‘^%çht°xLžðyT'² ƒEî™íF€3yþ­|žÌIòÀríÞÅL!WsÀÒ‚ÜH¾Ò)¨óXf»xTH:lé5«ÉU¸HÈüÓ¶íŽæ=‰Ê±¹#Öp[@ˆŸ7oø?¸¤Œ`<×þŠ…ŒÛ£09‚wwUjLQræÐ×Oô¾¤É8K³·I#E³ZQ>áâåhÝÑ®™¥ºkKqÞ•4ÈGÒñÎÁáPõSÙ¹šbIgïÜnlë€k>l“t¶§—ïiÁy®¨"Ão,) èKgšK-lî}ži̹ª–ç3Öµ¥1Uò( ×R¹ºMQvÐzCF‡þê<‚¿}©ËU¹…³v•¢¾ˆDý`\¶o6BiáÒÄŸ™ê*÷Ñå앺îNéÄ0gŠÖ©fœ‰Me´3{K¿;&«^ž@oÒüVüÛ¦wûç˜92ßòêb%k7îÊZ)r½±!Ã󣂅ÞXAop…†ÇÙÜTF¹~:ø ëM¹QvMÏmÃeȬòïjaam+<祒ô™¹.ѲÝQÞX^ —,Õ‡£pðp¤|D¾¿Î2ɄØÛh U:é,K…uͤLÐÓ€S\¤¿‘‰]ûâ,©³32Oc¥/Ÿf±†ËuáÄäÇ¢Úx¾øeÈ漜„)nu -~Àg ®|ñÉ\ç çë´VŽíCÚ(ájòú$‘p$Üšåž,°Ë•XIzfÎrIs5Cv®÷7JáÔæM°CA§ºt]=óÝ Z°(òꡤ{.qä´ñ¢­/,»Ñ L­7_tøjS¨¶!S?j8èº9àunµ¯Uu"]‹äB3äåLŒUÊwª 5`‹²õÞ£*ÄæˆKþfAÀÓMÅ÷ë7•G±Šs){ò°Š€‡Lð؉(Ga°Ú §×=] £µºâC‚Ë:¶H5¦1qÛ–JøB -zV=y]Žž`õ".sCôª_ìêÈîˆÈƒ RnÙ&MOU¯èªEˆÕÞŸ3DË„LÏð¶Y™õ¿ÛÎ^@qªå ZÍË!c™b™ú%çS•ª¦Â"­mÑ^]ãf<­pÀ´>>Ù“u×uÓˆ'Þ†È@Ô!;ñé`+û!aq,b’ª¨Å}Vùu…‘êÊíŸu4¶) ”¥ì, -ƒù±ŠN+Ù˜/~Òói“9æl¡MnñJ •gF¤ØÄ“¼ã·ÖÅ…÷‡©ŒPù‚'fL8§M’:áÙÃ131Nãêöô’øÓª·aã¡BÙýGÁ%”â“ne¼*4 -•Æº÷ÚÊßD2vɧ©{‹ u]=áqéLÖ¾Ö]Ê4¤¦$[Xò¤‹ÄÔ§Vùƒ$æIùü‡t\¯;<î¨MQæÓ×oëŠ×‘œ;IÃÉMÚRîI³¾œr$šÛ!e†‰­'æÇ/mÅy’9a®\!´ÔÿUg A¾·=Þ¶ŠM ”ÿV0co>„(h—(GÏT)¯¨×¶32±=-™Bú²-fõɈ\ª9l%É£Ú“Ö¾|Çþ}‡8tMˆôvzÝý¦¥ÿn:O¯È)N„»žHí -ÈGÑcFÚP•±^×|b‘±! U4™–?ûà›/¬µT=i?˦[æUÉÀ0C¦ž³XcS.è?®48‚.ñÁ‡i÷è†S³ßÆ“‡—ôÊ´lHáð -o‡ò'kDQ°sÿif½“ ×g3‡ãn¬$sAJ®×þ*Ç%$1o‹UD ì½|—“É£‹ß¼[v«§@|“ØáX Ë3©bt‘ 'aO=®Ÿ’ÕÅUGc)ù­”Mò|ü¯ŽŒa7lüÇ,å+Ûý»ë{ÀZáP{H†ytÒsù’5üŠwõ'4c5^ŒFÜó¼Ë¨ŠŒ„Í@ƒ2ò5~#ÐfÚ3‚·Pš;Šd„ºq>ŒT“èþJdš ä¿jáeŠUä+|œÄ™]k­HÔ_}ÔwçœÝ7ý°q1~¿…áL(7iRsƒ‘Wvœ‹ê}Éö˜AÀf‡µ½—?{ìù B15Ý+Ž8Ã*A½j ²æ„íøìâ¹Lª”ùè \3x'ªEbˆD¥Btÿ.Z‹Äúµ#Óž­oþkÿ¼JéîJð@ÝæØá[)\ -hj:úå¥*ú^_8Võ;àk™"äL–£€FhèJÐ툙ø·±»Qá@¡I-o W¾ -©eϼ£êhäåéù4ŸKן‹5‘ÉϽqk$FŽÛ2¥ÕlEÆ€ù´ƒšÞ#8§²Í]áî ÏØvÈ4ô õ,„S÷‘=Òɪe–40*h˜|{±{ãµç¦:x"«V&$ÆÑ稜eX eŒC½ãß½é’/+¸÷Y;åˆUvÊ{tƒ)N4;×ÁÕDý¢NÑ[¡Q¶F·¬ÊR¨¬<~«=nmÇ÷¶¤C‚®Q.„6ÓΗž› s˜E2&¿{@¬Ä‹€Êf5Hlp[Œ -'}Ò#ôoÛöØs ù×ùŠ¯k#­_Õo÷+ Ÿ€UÒÅ}'ÂL혊#ÒˆÊ43_³#TÝJ¾‹7çR³ÇÅÏþòñøm5¤ šÏ… Thû:ɽôS­Q¯ã}X¿«$î e£ñQ §^÷GŸéÄ›öÄ8c¢|7ÜÉÙ`6Û9T>û´íV‚¯¦G†¥…eHÞtÌvmùÎZ^~ic}læý²ÎÛfcË?njj¥-µ–W’Šºù$o| ™Ü€ýè9 -Ðûÿš¦>,ë>J[Y´£9^C”(ÔÇFµs.»2’˜û~×i Ñåsö¦Æô­FM¼Ãˆ  ïöoݲq £ÂÜEcµRâÅåÖjp­A~rA›ÈTŸ£Íý“Ìi‹Ô¥)êYmåõº$m¬¿ ²™óŽùŽ÷ï¯:ׇ“¾‘ª£Ì_“nE.>/ Ⳙí7f†Sßv÷ÆÛœH -übˆoª³,ƒxÈñ¶yÿŸ‚àÐìë^”ó­ßWĔѽà³ "ëaéíøxýÅ¡ ßúñp>: áêžVUqIUG¬ô‘qÂ+âsa•´ØŠÒ›tåʦ˜zëW…ârÎÖ $ªó¥$RTH5nXÁ4Ѐšo¯†¢I…WÖg€s3´¾4i•åìË™B©7s‘ ¡ŽÂä»MåÅ~Ô { ÜÁ1¥*Ž1‰ˆ˜’¥:x‘ËTýhï]ßd3¡*ù»â©­ŽAQtbQÊ ÁÚŠ/M&Òùž ÿÎdÈü+èψؠ«¶@r~ åv/÷Ñ2 Ê0ÓxCB¿Oð±¥<Ö3zÜOWÌ÷¥p-´ÿ žÄNˆ%3p–ê‹Û`]­éÚÄUÌG³ŠœgðÁ&ŽÃ3ªJ" ûŒ¶H:e+¥î\5]ðàÊ”“¹³=Z¨ê³B)BÑœÇó¿æ~I§èøþÄðº\zwóü®PŸ+Ö´âµkI:¡»® ’"˜¹û„¯’)?ÞTÝ8“í$Ýr‚0qíÇzTÉ m\Q‰W€ZAäh -Ì£œ‰7_ÀX.!¯_èdü}gSTÌùþëR#êî…¸e}Õ8㥂Z£Öx_ùbÁ}Žt Im¿.¾:'éC:FÓ0@µ* ÔØoÜ$9DƒêEÜ^e2Ê–DÙôBü·ù4ÎñÁ¸¿…’‘U‹É[6üýüƒò—„›ÈñV`}@H¾¦Çõ‚…+À׬n¹'ÙœAŸw,xp³%ÑWõ®59 é»ç!œ¾4 ?„#V’!»$Ï~ñÈIØþóo(ÆMn>ϯ¾(“å"¥ÍÒh±ÚÁ¥pÿ‡m¾Î‡’cc\ˆúŒ -[#´óÑŸ~h„ð~倃HÀˆòÇd!,€+{òöîŸs†{ls}¢òf°ïÇ««æÅyŸ7¬ŸÙ¢vß…,w”…bݼ+LQYùÕ+ÅŸ1oœ‚ýgnøœÃþš:º;Û MŠðÄB´‚­} Ä[(EI:‘Ä|¶¥ê%C”~Qj¦µ=@™œ­Û^Œü&”A.MÆÔ™ê8àì·6dX ò›ýñË!±Òþ-öúpÝP -Ú^-³kÎ3Æù§óå®cš1ÑakÉxÅY8$\Ñb<É\¸çDÛÎQ(·½¯H Ðn -q·žK“+‡MyÑþPúþ%£=Mz„KAyÉãŽ.ÝwôwÿÖnñò(ê×¹ªöPyá:YNqó‚Iqй{ƒ€´Gf~§w–Åùðâö‹p„ŸCJOtTŸIâ2ûµyûLD»j}êyfp9x‰ÏQ{ò]Êø‚Óíù· (3¯Ün&N].Úáy\.J;²áBž0嵊Ò]¥3 ª¨¤#í=H#RUÝ[A?«5×gˇ”ê¬Ø‡kŸÙKã¬Zð²K€³&*œÎK§u±!L3æ—\åLoÔ116¹æ‰$¥aõΣRÕ:V™§íŸ€*wisçOø%^ðç-Î\ÝîÇã·6nïòñ i¶égƒŸÄÅé§?4eB~ÈÓ$Ó44œîCõ-³ôÍ®%Ô!déÿýÞi|µþ/+™ô=ÍòÿGÎ&à=Ú!R0q’å=Mž5à‰+9ŽíÁ:+Z纓‹ñ¾ÈÔ™Q.“†\–^>EÄkf,9AòUÌ…êÔ4_Ä R#þþû¯°&Ü‹Mò•pVü‚^øI›³(>üéún¿?„š6š¿“Ye¨Ä¬¯Ï‹ý(3dæÓÑUág#¿îµ¢Úœ†®Vvžˆ?B• ÃoZ:Ú6®ÀuÓdÍ‘p>g«Ïº¼ì¼&v“k¸ˆ5O¸æ4¯ûfãˆüþàðHIÀ¹Œõ~†²ŒŠTW0=1hÆ‚y²¤†O:ÿJGŠ±Ê¢®«ãÌ´_Ð?B×\~2BÐt¢jwË}f»†EpÿÀ¾Wcªjüçä9Ç’PV],G92"~§ !æÏKÒ§¶Ä!»èh;ðºmXÃzžª‡ ¿†Ýxõ]N·ŒGz=?„þT«ˆº“it Ó!^ù™¹SõÖ†ÞÛSµº@Ú*ñ´NªŠà?Þ «íñÉå]”Òï®^òS¶mõ3(AÙ»#δXúpmn÷¤´‚ˆ–Oõå?%ƒ™sù¦Ô¹Eÿçù$ü÷²>t›Òä"š¥à1;å TŒ$ •~k®ÒçX|“*l%ª#ýÿBAX³…®ß @G¢ú»z™˜ö³yè(WÆè÷8©ÄA$€!`ô²,Ö4ŒäÞ—xłɪ«‘"kZh- iOf¾Â¬ñg¦â‰Ñvî:RëWp -—E]÷mk’A¿ÖÃ!«ƒúêÂŽW‚·ñ3MÊÛCÁmÔD“ÕiÍjb™šm^B‹ûz¤[=ÞÎ÷Ž0J;Å+>ˆéÖm7\§²Ñ—yAȆ†™˜ Öc©Pdd)´à®%Jˆ˜ºêÛßCƒì¸D¶ÒU -"Óü)OFF‹3_ÔeѾœóFƒo5ý9ÿ—2H{Ü*¬ZŠ‹<˃£Ps:»; -Ô½Yðt -Š &WxÖ™ _ãqüñR²†Á—oòëqðÁ8p8¤Êiøö¤ŸòªÍ^ë3|î3ìï†ù¢Ÿñïû«cz½™71à^œ‚w{ÞA("S…kÙP2ø=9r¼·Òì@ëóÃöûç ʳZÂÙu -¦Hð@ûc¸ÛË"ˆÍ¿ï ÈûO‹ÖÃQpzýKT¶5žœõøÕÂζ¤LÒÏxýj±Jj R‹¦'‚ âçV–¿Oj:Ú¬+jNvV­Éu_h¨÷—8Û ß½ïÇ>9`Sh¿¿ðï;x_1¬TéþuÇS~Ýb®øX¸çO¨E"Öœoë꒢Р)”L7Wù!VZv¦¤Ç±W®yð«ñ ñÒ⥕dèfÌ^È!ÆÞnÒ’½ËQ¢õfy@uñ°ç?ÓÂTM£|tbñ xHå^’ìÙ¶#¦]Åuþ\ʆêßÜòÃQU9›§g--Ü›Î/zUÏ0/<;;§]2@dÉÖÂyýnií ö'¿­r­ï Â×ĈŸ“qøPÝâ­á>¬pUIǼ5kd õ® GåÆRàâ«î -‚ÆùÜ!›hEñ}¤ttÏ°ê4¡¼C´äœ“'7çÊÅÊ~é—-¤©6Æ>|é¿¿”’nnö³è>í‘FJ㫱ŸJ4XbuìÓä»PDèð99ťȆ–2ZÅC’}?¸^—–ÝQ%‰‰5~ÏØ‹—;~¦¡Æ#“ݸ^'ÆdZî]€Ü ÂOêiöFéuoP´1ì •s½s=Éh[ãÞ‰#ÒþHþ¬¨‡ãµigH—ÚH‹GãùCt¾Kâ}¼j]YŒñª06)8¸˜ošYjKû Ií•Ú€«´`Ì"=€£ÁŽÄP=?Ëþ„¸xfȤž§»Âã™™OvŨ¡^ÖãTÏóCŽÓ#òÞ/‚,G/†Àv·‡ZŠêÿvyÈç H‘O)Rv)Üêð'©•\*¥M‘É09ÂM1uáóFÂÆ°yË_U[ŠØQ|Å4¹‰Ó ,÷×»îÿ51ÒL®'°é”¡Ù—ü,•»3_îÄd_à—¼¢Rª“¢ÿý÷¯Ý¹ç\UÙ71×™œdI°g Çà±iÊòººÒµw,Äç;J…¥Ë”þhõ'hÆÔ¿Xè®v0æxa/û°ôBæšQ,fßL—k¡‚J&£‰€(,qø(ÆÂ=K¬;lâNNö‡^>ò«9*>:¸ÜÙëÅ{ÿ·qEÉǨˆ·èv9àÍâT´¡šÊ¾Þö!QfÃE É”Åh=ˆ ¬Z “MLØvZrò˜ø“E›2Ä—Ô|'’Îf@¡³çé6“‘DüÎz¸ñ’iúÄD‘1 Eȧì"“¶ã™ßyV –u¼ºÑÒ"ŒJ!ç›±‰û½¤žÂWeMî÷¿’:*~ô³ßöR£ñÒî¯â݆§T1…L4;|({+Þt´nÊh%»Ú'àû?J]!b Ü'dÁ„ý&¢áúVi³T3pLËó{Û"§/§{7M˜â\ïµ rgkË0ØDç]™Ü]ðh¿w{ŠÌgÄÕœVOêç]×îöM³ÒI\Š -UÅ­| t·“åfkÍ#²¿Yï%”«7òûÈEd ¤„¥fw¼Çwy}»¤”!|žè9=c©Á1ôåaW£ø©#×cQÀÃZ;€§ ÉaR"/7@ðõmDCZÈØïªùr¢î‘R=s€ü5cØŠHë5l÷åИœ"h´; ‘þ¥Ôïsòm*µÒ;b`~ÌÅÈ–ÜTx=ÞŽ:ÓÓã¦+Ø-R¿å|püįbr´®ÄÛGLj$#‚°ô-Ó|>R‡Ï˜âá5ü*räPM[;aS,@ª83B$|_T¡»„|Pž¯Ç®q^kåf3‚ˆl·<Ç!ÀgmgÝæ½SzîÂ÷Ú™ñ!O~WSjeàWÏçGeyÀë4 ýêÜFây8£ÊHx!øÕÚ²AQƒ³è³†ØÍÕ8aVŸC8³D¨÷?D3h!־σٚþk óýœ³ý/TM².t!¯Ô¸?r¤ 'Qyuë…öë0PwPF×K8[5ÚNQúÿz0ÑÖh¡2"J]5ž‘‰øà y#žÙÂ<á˼nz²`AŸµx¾ÍE2á«©ù 3y‡U­Æ<Ö…*‹ù&ºã±‘ÈjB8½®A’䶲?Pê­ÿ8Mqà£F*AÑænÌy”vŒ:"‡B¾úpXRIµoئê7’+Ývã7ú—Œ“°ÂAÉ7ïGYB0 8!³êZ é‡^f¬’æ¼Î7ç™zS’*jÏp'âçÀ꺵xb¤/VH[ݤQ€¹zˆšÑ]†'MµÜG =µ·û©3ö*Í´0“åáÏ·rJ®kÔŽxÃÂÙsƒÐ©Ò©*²eÂ/kí>0„#§\}à&äÐ+X¤´r“!>d•“­àœvíýùY‘Œç5>…Óív¾Ä¶,ýgmÔ3wØi½yŸ™ØìÄ®În/¾¨qü6Üœn/”äL ³CúJ£ÔÁæ†VDÇZGhXiåÕ™y—åÐq˜ŒÏO(óõ‡}I|)DÓTéédšé§cÍ4jÉULÎTh^Þ/þ×ôþ¹6ÙÏP7Èÿ †¼ ür3Ç\5Š'3ž.Rá_ƒêɦGÀ6CVuÅʾ«/<è>Ya—áÕKˆò·ì¡»ÏæZ\ß{xúçíÄ<é&SéÁ­Ïúø:Ai’oÊ #}»Õ(ÚÓ¬E—-âé…Ì-¹tf_½ MK1ëXIÝ¡¡d*Á´óŠDÁ‰¿»ç€H ã”Ë÷,aÝ„?Âݯ#±:)€57[íf=%^–ÚxÌUþñîû‘ÏÅÔ ÊÉKêáK¬{7J,C!lÜÛùuU¢Æô’A#ti:Z1!Y1Ç|X5¯…ÌËUÑžJÀ“‹Ý¤°IúÌ7ÿÞ›†ö»Gp×IՄȹ×L+„Ït|LG>?yC€¡í¿É ¥’JgSˆÈÇiZK]`# A­ëÏI­0Øâý1Gœ™dƒ,ÆírzÖI¯Î£tYÝÁlQîHpü­Æ?ÉFÊõl(¸0NDï°ù>XVV8µ[þÆžd[î ˆ!¬§.3C™f"ÇÍ h5æ¥øɪçgL*GXü8ÖIÁf®¹·ôi<¢¸gu> X©Õ‘KÖ°o‹à:ü¨ˆ—ø‘: "ûÅQRT SAàl¸ôƸ hÝ£ÿÀÙé%ÍME©)…zJ4Õþ™ð¹«êúX&\6à9ÙÔ%A¯Õb·$ö?gFw/q_èõ$,Kj‹æ0·ö¬„¤ ~ô ì´íR;ª ¯®ÈŸ²G·^GÓÂK‘¬ñŽÄËüwçád-`¾9Wláðå¹»y¶ûa׳ æèŠ4’ZæAVŠ!!.Ñö¡³~ÓñÜ6׎5¬YÙýwéŒM£Ÿ«¶Úê2u0¿»Ãæù±¸Ø&!Gïa˜=Qy*;_²WÍÂÜU7¿_è ªð$²¦õ.Êûwµ-ào·¦µ÷Å [§å‰5¡ë{Á^k~'Ù½nîØé8ÅWÊÚ#2%õ5³1Ì'êAðn½Ð@Ï祫ê×cú9»'1výL-/÷(Èœø*)cí% -=ùG{ùÄ)óêÿíñôIDJæ6ˆ7©ÐÑÊË y@{r‘&m»ŒÖ/EßHP8Mj'Ðj Æži1•N1ý ”²TôYß…-ÆC N1J ~”ƒ]"ùÈãÉðŽÃÂÌu{¤iýu×5MíìØVY!:“ -M -F_¾e@eO23Éß4FQ÷ïèÂFi9'Z’dïÊk7l݃)?ñ4Þ2i¡S"Ç({x]çù_*g ¯&¼ZÇeª—c$¨§µj$Hwÿmìx¬'à$³Î¬XrÊôÁpÒG· ,g¿@ *5YÆ®>u€#Û/¿]çè–ÆÕ¢Ã.zGefÙéð~N7^_‘©šV3{¨.¶vMüãy'½Æ8‰ËÂzüº³Ao%.fxÒß.tžx8VêG‹FE^Î QíŒÑD27”OW…[(e6Yü³èàtK—oü‘¥êÓïšÀHÛw[ísýa…jÿ_:@³A"ÓD"ŸZ0­†äf„Ú’æ»ç®¡v¼rãVkÖO¹C5&Òl€<Ku<¾u{ä-›¢z’-ËGFø·ª“+PiEÅÄB¢Iÿ³†È šš-¨ýu»TŽIˆ¥€Ý“ò„±&ÆáÖ;1ß]Þ'8\Í1D·zë¥çÅïÐ9?âÌm&¨x\±¡¹ETØ"IÜy¾™ëÌà“÷ÄõŠdÊû-µIæÑy¦S´x/¾zñ¡Õ­NmP+ü Nl-d«íRpåÅo&Q„´gö•áðE0pÐ2XÆ*B¤éù6]‘Mé¹eó.0záV¥î´@ aâÇÅÏÿad¦&ÉÝf‚ÑV»+r +ªçJÚrZ»n–€žÑ@~mÐâ%*ë»—þ^AøÆ6%ªNý!“Á_ÄàÌx˜/Nj¶s·ìõêK.‡6»l0M”ÇÏq ¢6 •à›KùÎÌoC +s}Z4IEŸ;•Øi91_ª Ž`~Ÿ]{»þ:U-ðÒ ПÐÔ<_ Ûìyl>Ò’ Ï~F»FC6ÓþÓXUŸ2ÿÉàl©&F_Â’B>ßô…†|^;¡•¿WGy)ˆï÷4Û0J\ß ¹Ÿˆ$›âÎeàÌ_¼‘¼þêh:OÒ4™mFÓN°§#îYsJëÍ|TÛ©øÄqžoÚ]Z÷ð£ +°ïCQÇÓ–‚Î$˜»¡÷nÐl…Ÿ-PýJq”̪àn3 6ý`ú ç° hµ/<oLµàHX€Ä÷Ú8¨U¦Åµ-xÔ#Êe:zaÛ—~ǽÆÜÊgf‚òÑAä´›øì®&–.)OŠðrºcä^ +mÍÜB‚·4Ú„×+¨/tá>IB´ ¿K ·ËÕõ#}ô¾ö˜ú½¬œ…× ŠâíHCöJú È/÷©z©W5…ÐgàÉùYÛOë‚*‰W8¡œÉEv(ŽK³$ñ!!NÓ11™ïwϬj3«Û’|=M‰¾C!gp3 ukZ¸G°C²ñ”Ï7©V¥˜#+OÉ —õ1Œ–ZÒðåo”‚¸¯š?þj`;Fù_~ gœ _à. ùªEbˆD¥BscŠ‡Ñ(bû¶s‹'õÑ›­UÀ¼^ƒP¸ôƒÈúY®è=ZL™2®ŒCñ UÝ'ƒÌ‘@º5ò4Ò¥^ý±'u%¤3Üèýåþ"Uþ¾PÛQ²ÛÂPª˜ÓÐ'%¡Y5‘ºࣔÒB¼°š/¬´M[—m–IùX[H:¡× +q6ïÂ|!mm9IÓõïñnÝJìÞ@Ã#ŽÝ ½§s{·‘Ñðd]ÎlËšmÛƒ$2ÒÄp{êmÛ}§T+¥ˆ9›®ÝEÅÅ,7h3̜ћ°ŠVŸ¸ÒªÛǃ;ý—x%™¿S¥éˆw®»O+z}ŽóU|ØGpG‘ëÐÐ+z;Ž*:E›…2“¢£I‘ÞäCñTAR°‡Ö«ƒ…ý6iVùã + Ái)É>ãA=ÓògöFE·[ïK¾/R?½WŠžm˜î[pºg:­t¦yáUÎx4=üÛÙ•˜tÍb¡!Ëü´éJÙ NÈ6=õÛfâ¡Ùn<…ÿø‰<ÄCTÊ +"EAPüaÿ&­¨¯rÐ9l ±ªá‡r‹ù¹ë?Á ZBdʱ~3Ë»´‹Õß\b¬«Àa ø+â\S?;Ř¾Ó6§?ñ#‡ÅWÖ¶g­º".ÆöäyU +ný<ÍÇŒŽüêN;0¶µý•ès!  .‘~ÃäŠE³86ðo=òËm¤>¡€Ê:ø1.C,]9ð™&ës<×yGÿ#È`ÂL™±Êµy!œ´c2¸Ò”=3,]ÝŽ—»´?ZC †šIö³Ú"‰œŒòÌ7®c[‡g Çùâgj~ù°PºúØ/¡Ç*ëîƒÏÂS6‹ãÅmIxÑ~ë\+à¤2Úg ‘­1Ó’x:ÿÉ2ç(øîôä½÷LS&Žg}ãàìPõåùõV{±ÍqCCþ­…Í°»V›µUñ‘Z^ò=ê,ß¾Ô¼|ËU¨O~¶ÿJŒö„þjÐa¾ëâ ÓÃãe$ !Ç:!î–mÀ +ka·,´D:øm¾ÆGÔ)¨ßãm4†!‹|¡¹ß4²CY1¦sXHü¡f°•µ+ߥyƒälú8áe»ÆÑ*Q2DSðkŠ5$q¥ÍÜ:¯4~ð_÷÷þž¤Ê°èÑWö¾Ó°° Çòn`‘µÔû²Ø×£i™ˆzÔÑ-Úä\ªÿÕ[h©‹S´t\Ý*z|äTFl»è±çêX¹aÇKl•å%,¾­¨që×ÕÏ×òKWv !ÐWYðS¼E}è©:­Q”&ýZºŒ /)ó™“n¡â‡©4‘BÖÃñüNØT‰e,!"ïš bJxœÎ¥«>1qUMáVäB‹¤Êg¶`4S@¦ý-å²’ÙÑà_z¯!$‚,•¯‰1­Ú¤‰…›&8cr§”ðN8®²¾ ³”dùaפGÐ\WªÊ½ƒú¤ÊògÍÚ6CȽA4 öJgðKËË'<ì¿©Ýwx$›÷̵«Üj½è§O_L{´(3‘#çrM/î {¦ +C+cÞk¢¶ÌSÁð¼ðQjDùjžqK¨f`Áœ¨ºïð2õÙ›ûXDËC·7(¶´6h íå²ö„‡§ÌÀÀÕ„›ö/¬/.)ïàqêB¾ËÕꃙøÂüÁ#=~Úmü9Ï&’¸£‰ £èŽëÉÓ³ÃÂdÿjc„AŽÌQ¼hSôÔ—£Žiáçë²ïØÐ¥Û œ6±]ûgCP[Þ”‰ÞXi£Í©gÎ<+³Î¡æ±ÿ–—‚Àm¯V‡)Çï¢ ƒ\Ö‘’Bï˜=IÝ--Ÿ^}»í±¥Üh¦uM‡ŠîàäÊF¤òóžE›¸S^Ï_‹IEýy J˜[¢E¿¹/I{Ù3/´uIWX~:ñx…!=D,u^JxúÁ÷ðÌf¬»žÊ,ñÏÒorï¦Ï\MÔ_³¿PtÞËÚlBýã]ÙËïê Ì£Œ Š32…í´q×Òƒ†c°À>#€XÙÐB€g ðxÝþ×ú­“ÁbéàþâgðÙí]m  :ˆ±¼‡k߸„šõlS2&­ŒÕ¨ï¨¦j'~‘‰‹\lÅ[GGÚ:jØO„»Í=ÖÓu!Ç£¨Ðv®…}™»¤·Bµ­ôLže†¾Þ›AIñÔ·Ò®žžæŒEÃ+5'V»º`—*Gx!”Ðú{„³ÉŠpä4GïÂå +V@ÑÄƈÜÕìG‹bI¡%¾Úò39öö$~‹)QõrFe•òì[úÑ«â{Képc ɯE$$µÇèº$L×ÂÊ<%þž¸;J±.ßÉx³$ze´F«Â0»4#îþ*Go«±‡XJÛ94¾ô|ÊKÍt´­›ð/;%I£Hå4âfHJCÔÙb4Æhð\¬$3vyèX ¶ÈÎœ¾K€åÆRM—IˆìHÔm_^)7É[PÐ TÞ°2ü@¬Až¿é"ÜÒnL ö:5Ww~¡ \xÅ+²†•¸z(0GnÕÙaY›™›úš-L+M$ã³"Öu£ù›VEÔtL$?¢,¸¶ówë®+Íb]O½sò[kvc§Ujé™ùa)Žc(€uÕ®gæFí²”§ÏW0Î'ÆÎÒYÖCØ"%++• ‰h˜bJA¹¶é +£h@&×>rWeQ`ÀSMé´míàe謺v¬ß 5%kªVÓ¸¿nkÁh@öèÅÍ€åOš$U¡¡£]Ïû@ŸÌ¥ÈÔ‘q8–ÑÜ[»zòTGù¸WÚž¹xÈïhÂÃâ ÞEæÆ|i¦h´:ã ¤À[2Æú†ùÆžg‰;¢rßTŽÿ:ºŠb×YºÌƒ¾*í…OÈÈÌÈ@¶ jNô?¢Ó…ÊÓ–sÏ€fœ%Û1w„ +ìsí?ù…›¹L@ãB;DÈcM7ΊÀì™V)ïj“í×ÕKà)š÷+GNÝ_ýð ’• +¯[U@¿†*ÌRo>_Ÿ^”¡Ü×Û¾miz £IÅxÝ0'õÔ›<Fÿ–ÁA˜÷=U¿~ž´3DËjó]*©eï*9=7³Lo-É)<œÃ¾*LþìåÓè‘»ÇÏpÀs3~(@ qjqM†sÅ}™éæøþJ|YìžÐKÐyƒM&ç'Kûór„6ãÑç“S ŽTH(|øõ•ñUZªfœ(´w΀ðk@Ùd#z]ŽD­ê¯®C&"¼Y\Š¹& Y** Sp·?0¦¾ ƘuRú©x“ª²ái‰9Z©QÜ“½®ø@&²ÃPw”¢i9»Ó>L»®x„!ˆ’ôƒ½‹ñ[ã/ËÁ°.¦k&GæŒÙ—¿7õ˜¸WRh´Iì.Cœ>>ƒ\ö ‘¥ —¿¾Ä(Õ&Ö¯ø©¨XÅ$ý6~¦ÞÙŠy±™?ã; K€ÝëWŒsª¦ÜëÅ)ì±Eĸ4+S¹çß<ž¿‘BšÎÔÅìí>Ubq)S6ö×Àî‚J*.¡›²Ð&õÞâÃ$X o´ü4îŒî.Æö µ‹Ö*izµƒ\Qª}qPF§ºƒ9]q£ Á$ Ù‘“2ç%cê‰j]”Ú +;é#î%$6üØ>OïÑP(}ú nœq!$Òý“OXÙöjM/ã„A…Žz^ÐÊ$kq;²’á8¬œ©„ƒßzÎI$š‚ éüÊRwŽTi™öÙXIs[~}Ä,dQü®½ôc*¦@/¸”J ƒÂäî^l¢©Î~ÃÞWA ¡.p6ÂÝ#sDå—ý äoöA™ŒktÝ åþ ©ÎÇÝ¢¼Î¯*Òœ‚f3'Xš~é]kJ€Á¿¨'äñ^b:–úLFyCĨN$=ݼ±/&û»œCÝ{÷f]\„KûvN¤ÖOЫtc5##K2z¸.'p+ø3 Ã¨šÒ¥à6Ìf뢨¥z·òÔ^3—›Ÿçñ1 ª< Å z%šó/LÙ³”pÙ™<¼:½¼Bb¢mgžÔ؉²Óÿˆ®ÆqïY™Ö¬žÖøãœF¯{j- »²`çÕ!7½nå0ã2¡›ñ«8mš4&5Œ•sRHà`†ÐA¿û•å¸$Œ(Gú¯¿ P LãYšKgadŸ[»ŽÓN.³ú8!übÃËÈ!½ó nõ/ #ü@öÉäŸë°ёަ`UîÈ12r½  Âb#Q8ï:“ ÔñA~½‰õyuøåÜË‹3ØÖ_Öi‹+q ÚxÜsúÃMßãkêlÉÉ΋a²`À¹IQâùlÈmÎiûÿJJ÷Ðu½6óÞ*øܘuÂ"ûùÉ-GnÉ8PÎÜ‚ÀTñ—"j2w<tÈ·S½(·×1R5Ïo„õ=¿íÚ-I×$ñBÄ™ÓjéÂyœR@´PÒ +· +‚ó,· ˜Xœy?QHE|®Ü<ÇŠoR1<4êå4áÅ,!¹éñâY§ßk¤ fí¥?#;™Ó»0¡ak:{•#! êJÕøÈ_÷ÊÖa\ÁÚþñøc™d;Ntæå”sî >8Pú«œƒê¬6]õ÷ÌÕaÜz¼8E9BB9ÆñÙ/g#椫Òƒpl×rÓ0Úy.<<©º÷m¿µx=³ÀöÖ4¾YKe×i·äˆœ#¹±Ô©o>¬]âŸôæñN„E…ø>!W›£jÆìÜ_:†rRu8Y‡A¶Ò<3)›ÍÁ˜÷™ìÓ‹°,oG0—¿‘’>„èúýk ¢^ǘâ3â½TÞÎvúíÐÙCˆ¶®³ó öHkE³9&Ž¦\ÏE{o;¨–94çÊòj7Í̓ˆPйÁ((É̯7¤F½¥‹Ä-6Ù†QJøF|ÊÁçåQ'[pû5Cfקvôë-º£`QWiBy}‡Ï0fz1o ¨E2Ü °PVp*–loübÀÚ‹:J¼êmå¹kR¬M+¢OÊvñ0f¨1 2§hô_bDÆ:RÖ†#Ü÷˜¶w^¸5Ñ4¿aM…Ñ*êµR7ÌÝA‡`µ1ª¶ möÚq'±òGÿG<_‘¼N#5{b¨Ô„r5^r»¬%%7ã=y·æÉê}¬Ž5OêœvÑW낉FÉ3©L\õ:zEø“(Õ󯚛šË <Ëü'^sqâ–Sw˃×.áïÞç²6 +¦üg±%ÛãÜ!sKBÔ1Ce>w®¾_êñÕòãÖ¡»¤þXâ¨Æùpä+‚a·4œæîR¤uKAüû_€†¾Ù'J‡ 'ðÃÍ#2%lñ*èy}S«ÐÖ‘›Xù‹Èntw"Kv!.L!¨^Aëc‘0̸ÙIâ]}´>ó,âÌÄ¡ sûŒäÑÿ£¹BÞôÆÇ”SúµšÀíÆ ñI§2Š=åííëõ aÙ pUÇk}aÙÁ1I™tˆ¦b=¦ÉßÖÀ04ŠýxžœÈÒ²8­:I#_ó¸Ë%‰8søKïÓ n}èˆùˆßB<`û{6ä=ÆØwP>[ŸŸâåú÷wÞ{M‰õÔ½‡iÖ]ân¯Å6øäð4÷6*þNÙ'3—Èê‰ÔM;óžÙ6Ö°M„ì'*…bôOÈ”óßþpÄM–1bM‘gî³ÓÜ›þ„«]C~ƒo´çå6œž j •©¿¨Å½û¯1 aƯ^$Œýr¥S§ê)³ˆ¸Ã08IúËoö CŸÒ8ÏÜÓCKc +]=@°BßIÍOö`íÏäIÑ_ŒÓ1Gð®Œeq„Mß*¨öߦE˜åûSø„ˆHƪŠÍ@j†‰ÚÍZ×cÔ¯—UŽ1>@†ÅqBôÈPû©Ú|?.}i¯k÷­õÚ¿þ«£ÀÙlÖ–Ç Ðw`dþû{9ÛI’|á›üã%Œhm€}Ì8Œx‹A™Àâa›Î|ÁºáÈàuR¶oüjc×u§­2e‰‚ƒ4fôVqÜ‚I#Ø'—£>K4Ìhô M‡ª‘øUòbŸŠžÓªï(%ƈÚâºóµÔ§Ÿg{~‹˜ÿóÌLT±{óQ!XŒÃ”ý••ÜXfÑFe5˽؉ħäò0ÀöÀ`{‚›ÑݽèОµ5±‘!ÜïB»÷Á!á©÷†²šjã±xÕ +­üîD±Z¿ŒÏ[LÂâ­?Bg8Ì&ú±¼„!~æ-ùÊoŸ€FÉ)! Òùؘz¯­#H,K[Òšýÿÿăed*eDœ·š®Ïð%@ÿ3Ë™épHݾúzÓÒŠÑn—í®ç»~Þì—"eüA#Ïê†ijz=ÄÛà“véE«ž¶n˜FÆ×»Ú¢£^±?!¾=ˆÒno_h…]I~'.I–»O˜µG(r{Í/¨ê„oá +]mmgf³Ù£ºï\ÛleÉ!5¬cG‹”ƒ´U [d] ô`ÁÓBÉC"îA‘ʸ`ìϲ¸˜,aÆéo'oÃM( ÐØßVÙžÓÛ> +í)×ß=1î’£}¿Ó?uÎùµèa¨ p‹ç©Ú©ùßz%沃ࡠ+ø{oJ¿c?ˆˆiëi’‰ ü˜ ÌÂþìڰ¢Z¦„­wÊ7mORwl"l›Šè¼™@3삽å‹CF÷¸33¾hr‡ ÿkªVmç,«Õ£gÍp1,üb1Cã²c”d©$È´–õA`•`hã.Ë1ë(dKåªUs7#‚©zYVdièk0XÊ»>Ñ"ó➤Ԃÿã[ÌW£}×-ZÍ !hÂÃ:%ýó+–Äò·'ÓÛƳgfä`»ê!wÈrã>7^EÊ|Ž¿C%ìYäc0¬ t'úh/©œûÝZ~~y…AGñ7›W n*È:Ú÷QŸæª.YW‰,.^¹vÏ×ÈÛ)}J ܈è…)ÿÆŽú-ËÀÂWp/MP÷£-©Œ@É"³´¦®AHl¸Àpf09X”Êè­9 ýXØÖ)A} %T*ãPi¢wisÎ<-«'Q­:Ýô ÖœÀ‡8Ì¢Z¥±â°tãÆ4T¯² ){, ¬ç<ª¢‰8•3¡þÿ†.ߪ–ËÌ䌙¦U¨A½Õ[Ç-´ÿ}ÖT*ß›J‚†ay&„ã ²'ùÌa¾_$1æb{U±H#ï†Ìz¯ ¢ªª—Ôf"ùÔ‘Ü~,Ú”ß*Àù-9‡¾ 0—cEtEO™ÚVÕ^í_ºÎ’8”¶\0G“¸.ô08(2³ 4 ä+ç¨âô ™cyJØÎAz*B„×#ªß åÌ™d5 b…@ä a!%ú°M¾‚¸j[§t?l³ð¹ç 3É¥Þ!ÜU{UÓŠ–/á@JÓ‡sK€V+!Æкã"ÒY(c„)*.Ða½"ƒm:²wp_Çš‹òËbí‹ÑÓÊO¶¹û ~\Â^æcò‰¼B1uëÈ$°¾rå%mxÀ’¶øýy$‹µÕ{[ÆîÙãDïP. ÷‘fÅ7› +-[ãµþ^P‰Ö¾?M]åÂP€Ló»ØÃì˱9äý¤Å}Ër8fŸmQK…·¥Š —û¹™gx`ŸQ J»7Ö7X…ÈEv1ûN ß"PK R±!×V¼ãʪիkÀàåì ÌHÅ5חǽ-¸NOÀUÊ8&þ“ØãX;ÛE +BmC":ÚDw|ò™¡²FYVf(÷ZŒfF=b«Ì~C³fw§nꘅãä ðõ&i^«´)H’uWkõüëP`š…KÒ(%9©»´ /:=°œ'œ!/™ß8=JУÿ=¥§9r¼««}<Ž²‹Ò,tMD{p°Ì?äN k·ÜNvò7køª³’õ±{ Bv?³²+ƒhAÞê’Œ=¿£Ëî—JsÀ«€?iHÐé1ŒkV—:Ìnð Ï‘èŽôrн ³â–mÙ.O„FæD‘.lLçò ')aãw¶¬`t¤ˆ¡ìA#;•FæÝ8€~ø%ô„ñHšÀá;×I›AZتý™¨:ý®Íà@n:†Ú(£q';yûÜ‘1j—lóðÜâ BáBq–'ÚÝúÑ܊ﱪýÆzÜ>|Æ÷ÀÍkù‚æ¾Ç=e"ĨåK¯ÕW +ipµÓÁµY-§bJÝÏwpvõS:õ4óøn¸?,b)Dós ‚S®‹¼¼è­GCŠ g°œÆB´n*X]Ø¿€Ö Ô*f q[ï¦}NÎ yZÂì¹uVìÄ@GmÌLϾ¥åô:÷É’0oUý®àb›€k~ÇBA_€ë°Ÿ‘M¾,ƒ–¬˜‡¶8ű‘ë–½¤m6è×WÜ'µ :§¾£R¾ºÅ~ ¶rƒQÕO>é%³*Š]¹eŠ«|x„ë|´T".ïsiX­7”È“¹Î—¿•ÒÄ~Ð1-%·«çÐØηá¿L+ÓÜŠ‡rÒ ¤±ŽôQxpÍÞ)QP9ܬáúì;›§´æ êdÈß÷!-q k× Ø%ÅÑm*c_8pI ÃY!RhÉ¢ˆãËÖÐ,¿öp¥e¹I¢C¢„-$âcŒ¹Ø•Rɠߢ>b Mâ\G0" %²0RAê·&±Âc´OB`)›zÈ£cÓZh4|HF¼ÞFFW­ùø Lô“Ë¡W­×îÍdðLld»Qßéï챈0CQ9(O^½èÉF:ŒÝoíż[(ÌÏuGÝž"q8j +ˆ –íz.F>ip"fûχõ–HÂÓ_A ;–71{òø5h¤Td}³yÄåyÖ`ïـ܌¹©ÙIž•‹ã}TZ6Íð\“öAºÒµw2ˆ•Ã¢º£T,bÁŽçÔG¿"¥Æ­øÆû-ŒÝd!ñ¢èlÑ™OÂýHY˜\e&þ£ÄuÌ=§âùàÏá!(54ùþJLê¤Ò¤þ0°öRüzá^kRU“Ÿ£Þ¢u÷I@Ã,ûÎóí)ëaàáªÓ4ÊÕ‘+{(På4y Q¼®ß…\Êmsyö¾/õ^[iˆê?v@ðé"jÓÄ„ „\Z+ìµ¥ û“/!Ù_=aSú8¯‡¹Ä +¤Ükæ+ǶçV óåiýwíï·ÈÈÑ5UŸük"@!ô½‡ôl6›3Tìõ\Ùí»ñ õR‚ÀŸuŸbÔ¿#;ô¶¬E âT;ûõü¢G§9›5K]Ý“zŠöVVE}£jHÛGô+¬w>ðÝž÷ gFÇàý'î8ð + +eª\Ã6OÐM9Eâ´yIW¬w#û<ËŒ)Ùp%¦¤æŠ¯þq\ÎDs“#¡ÔWô<Ìœèèúõd̨ßHJ«çVkr5›QŽ¶«· ‰ ©£œ_ùç !=;¾2t?!fš÷Óη¿ëÞ¢€%þm®.÷õÕwËÜr¿j2ÇXÜš,Rªm¤Ñ™«%mè•rcâ4ç€2—(2^œAªÙ¸IÒk™¬µrí»ru®¤,G}³¥jЄ)¼î³ó<òˆ|ÓÀVäZXô ¾ë‚SeAAý/öªå„<”™R>ž¤)\:qPš4±…= £a(uê3©1~:ø^ðàZX»Æä+½8iù7_[ú™ÇNòþßDp)„3.©Óƒ¥L®›µLíÞ\ñΗ³}VòÖ"Agýlvâ²33«òáYþþ‰Ênˆô>,±G`ø¹¡ZZÍ +3è¶2šx}]O-iD¦u¦VQD%émÙpØMçMî”ÑÎß-)»C†Jµ·p¡L˜¤6ãÕdÖ1A±í æÿÑØ,äЄ5.—ð•ïdUšS¿Ç̽®-‚1»|јé|u“Ã`óÈî:zåœf\ÖBõñë£F=ªˆ.©jKǪ¯ž«Ö»'+MÚ¢jþ»&õ×æ@ ÜÑ^­¾Þ8uR<ßÿ>¾.ÊŒ°w¸£Qƒb 8S•uÝ€Œ^#TÐ +öO1PZ®ÉÓŒ$|%Ï!¿)OãÎóz¦Y°F1òH/$Lt¥èâ‡MùšßÒá“‚Ìßû%æo·a+¿ã×™ p"D›vz£©ë.\'ø±`+žÀNcéã¥&8‡{ìe‚œo3§Ï¶Ö˯·fuL‰4ù«s‰0ëvTÉbƒ¯Y7^XWG–Bïgc§n[w½µ¾àÄ9´>š§\µT®·vöžÿí'nÓDŸ·Ó ’­qr¿L¼f§ˆÒ Pü*/™±‰»…¤ÏU¸º‘´wˆÈ¡ˆåÉf3 +=£c3oy&˜Ü8¸|1H‹™Ý#g&¼ñÚ49TV]5•Èæö¬8äê‰mÒÚ¹Ù†ü]Ú±MÆF ÜUÃìÁ¸·# f°'[Ö¸´òÂK¥Fˆ‰KÑ©Þ͸W]Ú¸¯“XñÅÒ:"¦¿M=sX¦ ^&Xµeö‡É§uÙjIüPdóX—Tå =Dït œ‰)¶QÈ©ú¦˜»";垎ã«ù†:ò \E:Là—#‘’šEøÊòGXÄÔdvß@ºuúìqÑçùæ”è?—Ó§"áÝSØ•}Ö"ó¬zåÕ—ÙÊo{$i0Ýûé¡Ïˆ4æ/Ô pvÓ”þi혤 y±×¬¤ëG±ø%­¼ͯ:ÞâÜ‚ ØÿƒãH7l#Uÿž úÜ*+¶X*{ò¾§NÐ,ÛÜRh§˜ŽR[‡ê!{´’>î›eÎÀA˜FY"Fªžƒ9ñ“Ÿ•ø¿é®þ±É·²jÞ9Y‰Ã\²Óäö_Nœ'W)Cûdï3²ôB×LzMLN óO…X[`˜®j¶H5¦?¨æ³MR‹ÐðäìËãNîŠÐ;>#G3c;¥)§`UÆvLfioÉxY’õ‰¶&;„)•QܱÁ®G^òýØ"›H¦£ô™™>jHDkº.­”_Æ«Ï>MÁ&:0–‚AhˆýD@CºÈò7¢’æ A›ÍámIŸ¾T\€A×Ñ2‹u¯‡lX6`òçnfmÿÎ{_Ü0´‘(.ÜW—ägcmþI÷ZfƒFan½dÒÅj,SÈ©cyÉUâèЕa`•‹‰a5“]€*Ü,Y\ú57÷©–==´5lN,ŽnÛÕ·-Ðk£JCq¦µ +÷÷éº!á ´ ÏÃ̺¶¬8ØàC‘L'2•£ ¡v³6Æ Ç.ßßÂwŽ”ÈÒ+·¥Ný8ö"‹@gbp}&©¾\kš Ñéî‹9¦Ñs\ðž¿FxϫѦÓû¿ƒWÔÜ'É”úðA(øcHûšQÉÅò¤”Š¸ëPeB½[À»>¼âçÅn sulÍ®¬’¦Ú7"^Ž¡ü\¥V4É8ˬŒÚ‰—W¬ÀÏO)Ã1ÃtÐaDÒPãQ˜;ñF!Œò6'O布î„ýô6­;dmýõgƒÄfYúw1ù`¢=PºM†MWY"hã“¿ Fêù¼=†ã`x2ÐåÜ:ab(™¦Í¡¶ºwÏE%é7ó óùðj¿dm%Ä©¹7úÕÌ0Z6ÅÇ@¼[6ŠŽ}e4<§ºðñ:Zkn¯Ã¼¡Å°þðÜ¢“ðž1,´Ö½enþG¥÷Œ4³Ýø:ä#YL*GÇoGÐè‚üv›![©N»ìë c l_Í2=çu¹ôëø@ãòÙAa“Õï´,y§x’ˆÓ¯p +\¤ëøýô_l7´&Ü$jdŽˆ:B»¿IPåÈ¡qJ+ôµ7Ñ\f¶Àîº .»ì„ÉØ9ÏBbdìbJ^{.B²Á¬œŸ{v˜Žö¥ûVÇÅ3Ä(lH„Å|ìà«œ¸uçô_‰X”™5âNûŠHn‹½‰%üL›÷ϦÀ Ê»·!5Ô”´Ö¾ªØšÐ¾ÈSÁ<ó£m/|£:QJåo>'û(Ê: DX^U¯Y½ÌMÊEõpy‰>Äz‚ã–µœ@€ëH§wØš_†½‘ C UýkºýoZ°ç&ÖÜnðú¶s£?¤ëHrª0=3ÕG‰•µ3†K«ÚX©¡‚ ĶL›ýQÂà? TæëÂá@C6@²OJ¾4]j:%<–{ÑöÐæ΢tÅT¸„ý¦€\m˜1«æÃÈ,uò¬îU ‹€kC%ªôW^(.ãÐãÁ£•‚Š·WÖùÜ¢-!´ +½š|ü AûqOîPïGˆŒ…íèU\a¡}Q[&IqHñ†¨2Þí\Ÿ´&5¹C{;§þºéórÃ}K"bw ›åGºÂòàÅ ÇGiä—ðó Fæp•• Ør®æš«Jïy[¾xÒ±š§óÌYƃْ‰MÙÀuÃÜIL€ê;¤`b#+ŒMúNº±íùŒM$ÒP¬?k÷×ØUêà—ÃQ ÒzaMò|:r±i¨è÷a×ix¹!ªñå"°E'Bi—ÔC@ŽBšù±Üi/…+¶—éÞBâx/íL´ŒÎaaÏ_|‹F·¥pËV1¬ê4êºðÞL"6Ó~Ǧ¸îO/³9e`äV†.‰Až°iY–¯™áR%!e +ïšéñ¿Vxúýz‘ͪc6P„˜2)toŸÃ×0š v;XMâÀ2R’wM Û®ÎÆÙh*Ø¡Ñ{ÒWP‘<íUM3YŸµŠÔuÍK·VÄaÎ9 <Ä•N‰Ú +cä°(É?Ìç#ÁºKô¤o~L4~¡5¥ó^EÄZlã0‡“¢°{ïc6VböD|&ëS^‹;’·o½ù`0´á™™­¢3y€òHÑ â° êñR½HÇî;c%Is>7Ú}v)‚n˜]Q8ÁŒÿ`ô­ö‘Í]ö!$Tá(ût6gù[éòYSÊ4Ï4k-mž³câ[*FÅmi¼«ÈH­w#@™ÂRš÷ž£©Ôo¯N,vžÄÕ6ã`Ké}_PÐâêí8µ`¢ß3¯›ï™––ïU—HE\'=êk*·¥¨KìBŽIjÕá¡“êÌãX%Yú¦ýÃßß‹×=Eëõ[#6•œJ9é°}Nªwß1û4Yåõg‹)­´6ù“Ëu…l(ŽêDW¹8qâ¨qB~8^~^L)®?Y½š6?eö8ä6îºIÊõë©ðÃÔ@ÿ«RÌãÏorqFán¢8íؾ¹d__Ͷô_ÚÙr½J!Zn¹G`~«|ØgòœÅ¢*Ì^åcD>Å-2Ý_ý€â_c„öõˆÍ}òÕ)p™†ú#µŒ<¼U‹z¾!JÐK÷‰@NÿžbÔøòIX²G”ŸJ°TLäÀíuT9F +Ý×(c Ý•ZGkðÉÙ„gÙ\1Ù6‡¾í.f¿üIò8¢ãŽ£’dJ¤ ûg×=Ò8]„Sò5©æ98·+[?KM9MÚ%ã?f#lTðEø ¦¼—ŠQ,j“0bœbü9s§J.¿õð2qìĤ÷Àx!RˆÝ¼ÓæÜÄ0²:e÷âri~¬ìè•ÏWcœÕOéÜ®µ§ãIÄKtå\rU<2—»ÚT­Ä%®,­8¼Ÿ ûìqftÎGéµ/$’¶ÄqØoc!ß]AV웚Y3NÕc`‡L—ºå» ✽õ ¶ ~FJ¸ˆ÷²T€Ž^L õÀoþÇ%Ýʧ9NûRýŽàì3`x¾z²…«£$Œ‘Oá†r&;b“¨¿‹îÄUEÍLÚ<~ôB!ËÏ~½võy8ð æ’ V'ñÜ­˜¿öÓœšÏv„Ys†ãŽ\öË.˜ü4…D=KxŸ;Y8â¦Aù ©ü_zû1ÃIP¤;´S¨l» ÞLi¤}ˆ§Ê/!RŒóúp¤:g¼¢Ÿœå·† ód"Á•Â˜IÍ~•©?è$a:Ë"d£õ}2“žž±tþäE®Lׂ§.d øÑytïa+ïÄ@çlY™.ðêb‘ЯqN Ö@èŽ5¿¯äâòwr†e"¸DÐPX›w¬Yî0m»%2ÌðÔ +5ú»$['·‚Ͳ„c´V"ƒÞ¨à@õŸáøì‡ÞB´QÕm"Šx”U²¿™’„‹Kéa­h†rt]«¥›Õ +.p›˜]&îpO%ýßW¡4Dß}åzà²fîÞNL[ € :û1SSQLv+"DCPžîooñ½:LBygnUÞœÍÙ«×”[®žûúeÐ`Ê;»¯XL5¬–ŽÖp„Ôq-b¼‘€‚²Ðezï,¼xY¹-)…í==£ìø3E~‚a “ûSŒ'NrûL8«ÕÌŽbZá¿Ž¦—9D£t-íÿ2RÛôÍ#p£YÖ9ËÝ«…ݧŠcUÀƒ«ÃãÕEå‚ð¸æ­j²™%0C¯H–ÈÄÂýÐÐÀÏ`s„¦ ++ö3^9zî IÕ×¹Œ¬}èŽn“†;ä}€¢I5>Wuí´æÑÍ„8ÏoÇ+ýa Pˆ0â!ŸM§¸ð þj“ì"Sk/mÇñ’M¿£¯NbÁRäZù&N•ì|8j5ô¾¯E;Á£A*¾‰-›šŽˆî Êpô›èßÈ?Sœ¾ÓW÷ñs*NÔÒ`9Ÿd$dô1ÍLZkЙ± «2ÆÊpñN:®Ædk?›‡ 0fêéÝðÛé4 +rätÌU©[ øþ‘«jC-_¨A›ÓøÙœùxÕ'Ôê»vPµ0çÛÃV±³äÉH06h|œo3‘/rŒ¤ Æ×k¸3åÓ]‚ ßÂÆ$I\ÓꪢóðñåÔ£[Üêwû羪$Ïüâ Õ6\[_%ö!Ã+߇–ïgÛåÌó»– ®î ˲6 ˆ_uª`‡C§õr¶®¿[Û|¼Ÿ;ª{| Ç­ûúú›îÀí7Œ¬QŸè¨÷0û Õäßáhæt”Wzn˜0œOb`E~ñìK7’ 60Ƕq~‹yJ²ÛjÜ€œŸ·#Êö5wŽ{4c²~¦ŸKìKØ#”R1°Oo\+Où~prИ¸ôlà–á&„‚@±Ç?‚tÇoã¿!Õ÷ŽhÃÁòœ {Ú¦CòpÑÒ0Ý„ÔÂÒ­B,ÆÕÍgåÕœ™ +|¡5æø–Qš€ÀÏn™kŽ ÄŸÂ™òûZç e›¹´¥§–.@to½W¥=Ñ’]u3%pÞ°ó8Cxz°Â:‰ Æ÷·a>²É …ßÈl‹s÷«Û¯L²6&CÓ‡âè“”Bú.Ù‹Ii Š¥U9CTƒ#¤( zÝœŸ(K•âá—á8u(ç¼qCS)ŸO6/Åkªúôú«»ÅßuZ*óì6lðqÄsvjЃôk—_ur5OmÙ{(UºòÖT‚wO +Nˆ Ãl¶Iì¿ëF WR}7¿Åù¸±‡¹…Æ[ãCfô0$ðäõñ<Ño…¯eóg­ÈÓyÒñ{p…jü»kx ØÌõjÛ¿K Uæ(–ǵÇ'Äzñ4÷èýëò7—ÿAóoùR§@á@ˆÞÅþÌ9`á¤Z凄Þú / 4˜otñ©?r]êiç|Ïm1«Ø0OÆ›ÒÑ‹ý^u²^}™›¶ºÇ‹|Ç»ÍßKËê¸ái¥ñ,di ™¤üµÓÃ]}ÙE¿H‘$,¤:Â]_³|ÑŒØ?T 3¼ÇETâã&ýV.Øçìù"‹ýüŠø "‘4@·ôÃÏQ§ÕŒT sHß"&e0‡KÍ"|=Ž…Ø7“Ñ®ü‘½°†G'íÜîý”8b"#UÆDL¶ ë)­s|õäÂw>…Ñøº’}3}PþqÕ^‚þ~|#R¾™ZŸm,Ô¦ÁÄB‡à£îÙåâÒ é™Ú¯¯¤¤zÝߥ±–©· èó\JñYz ãçhRϸ«é 62ùâßÉúâ9ùuDˆŠç ‚»ýrÖû#‘^W\T¯˜2ä<ÎÛÏÜ —ßÜ/ +ù¿Ü ú•8f©'¶ùÀóuÚVñ’™©.d§Ú,i ¹ú+HTzw¡ïõqFÎѾ™N¿le "…ºèX¨¤å¡’æZ˜ ’q£ÅK‰iߧÊ"½úJ¦5¶Þ!î¡ì#ü$ ž*+ J×ô•|ï)åÆüQÅËÅvŽH¨¡÷ råÙu˜üŒ ,Fµ Ddÿ`qõÌùܼu„Y½ íž4šs +Ü»ýžo6Û¿A_* +×õüp¦ÅG½´Ra$Ñžl~ð«Þ9éÉGâa‘6l{Í„ÒiM×oª~Y·+‹ f¬F¿KŸjVP3{ÿ”žÃfò¶S£ÞñBSïçP7Ðîd!Õñ-lÔèl;Öá—r³lz¿'—Hhb»ÿøû¤}Èw²ñòj\îgð”@€nÔNà qúÞä» ¼6¿\ ŽmuX"øqßmÔ) 5/H^ì#ìï<”º‚Z^O{DtË+ó‰|ïÿÝcÎÌÒü±nÓþ\3ÛNsñòÕ¶t‰Ã-…˜nöÖÅÂ{ã±bˆ®÷çÓ÷óˆeBý’ÐÀymÊvȉÞÞ#ó¥¡þ‡ö!9Ü«“u¨#x!F¢"†8{Kpv¾Út8áÔ’öã3å%Ü×&43檑9t‡IlPhïb”w}‚.½BÑÏEûKj€Í×°‰ÎG?Ý“õÂÑÜÏÜÒ*ØC„yR,@dØñÛ¬ ªÄI€‚G~ÝŠ toAvAö¶Ö³ìçß<.ì‹ÿA¥úîsúâŠoTµc§µÌ‡Ô†,Ì°"kÀfwá`ù?ŸU­8ÿ·á‡ÔÄÿt&ë½ ùÞ:nAJ`uúdã­êìš'øýuØ`S6©¯Ë)«èÁ¶3£Ö©›ãâ R„‹vç"–ÂÌKE±+2ï*[h!œŸ“…m"Ž5œh:gó@uF æ\‡š¥.X»(øµ¹Ìãû,Ôà•<­ww ý[p–Ê–Ú¢Á‹Zª‰aF >vC%{¸Êÿ*z<$TÐ.Vyñ=K¶âW;"%Â-goà½uÇŠwÎ+´•ÞbQ?Ú,Bö¯1JR¦ïYn±l/;]ñ)5j„é çÜ>m¨îobê¡Še/¹kY„\¤^ÃmæüF. Äåe}ûDÿÆÂ@ƒUî0 ëaë„ü«[˜Ž©¤IÄßô¶d|Vg`ü”ó®PR‰OEÑ°¶4¦qúÌK½cJâ±WM‡Vô@íÖD‰‡±êƒÆ±‘Ñ%5é»ÝcÞÆ°2ûö*¯éŒ«¼˜}úÁ¦ ;å:YxöFJ4¨«ô[ ªô·3‚•Ï’/öÖ%¢Eo™rŠ—µ?=Kå­B b/òlÆ òÖÅ,£¾šq1:ð?„Þ˜ÝÜ(r²áF°’„ßò±±Kþ)îh߈~ÌfÀÀ8ç~/ñHN°æ&ª¼Š?µÿßëq±z¶l©üý,Ö˜$ ¹Ó~>€_Jb߬J_PJ´—tø';^˜ ·*·†¬}½;àÚÈ "ÞówGñdU:d ì´^3[ÑDª\›t‚&þÛOq“Èscy„g2KLSTºÕHKûÃ;91lZ©¤§˜á¿‘þrÉý…òê%¡[ +aU[ ð¼ñ"cÂ8)¿²ï‚\amFñjËHão0.ÜV‹óE‡ŽçWI×Ó&ô±ïUëSj—uXÄãÃqÀö%kE Çíœq5·ÚŸÆ_&Çç,IþDzíýœ3×HÎÔ2„À ^¢%µ’>*ã1H´þÉA[Ú±Z$`NÏŠ§ö±§XmNm|ê#{â€7jû¹[øXä4ë:ÛÕ°¯YŒ6vOO)†Â»›Ÿoó`ØФ©+¬Â¤wU/Ì`»ÙvúÁ¿¾M×Ü WŠŒ§2¹‚/ß×Æuÿö`¥Ç©`1ÂMé >¶Øëòš{»ß„¸ ~Fp‚Q6'‡ü6Ô”ùýÔ³qЛ‘òWE˜:Ú‘daže#Vip †çô’ñfJÞïjs%ã‡E˜¯p½”y½€y Ç·‰yG+ðôó(Â-ö¬0ZZvO:c]ßçSSEZ¿·7®Õ`Ù ® #"’t¬ +xÞîBSÔ·HÖ_f,~·&ßù@þôz]ˆÕØ!/öð-¤j7Ò½éáŒJIp•kíþ ­cìA/çRÕÃ8·ú©Ç•°Ç£ žH:ÜžÓ?Sû#„`%ѤMánóÀÑÑ;’êF8ÿøU7Ú,r†Ÿ@Ûà³==Òò­€ø¦I/³ÅxÖT8)&fþ[ƒ_‰}Y ‡L¤žD9ûÉÏ9­ŸöÂrÏ!hX¡“/w4xz°Ÿ[qXPÅ[< Á\¤Y.Öì#©GcõØ„?`˜°[Q°`¡>ƒúa<Û ¥ïú¸u€~€µg0.!ßÖ7Ã,Ò%á޵ײ+Ùοô$G±oˆcJ+‚m3ëµ;ÞuA&:è@ÒÙ|ŽS)ÞÛ1!,¹ÖSjÜÉqÔµ[ñ:E‹W: ÂDYçe»zê³þmFeg7l™sø]gÍa^C¼&IO÷E¤»àoŒÖ>ž47¥K»m÷üzüŠynThm1úG cg@%‘ +Žψ«"û\Õ©>ö.êÆC’Í"E5#Ëæèkrò.aÓûsáóhÐìî‚ ˜“7UÿÜã´¢p VÜ\Vï¦ÂH'½ ±Î„H¼Æj}lŠà®YI½^~yHׇ®¡3ÌSóXx¿/|Þå7ž[…ËŽN ò¸(FÝLŠ}ø6ŒêŠ¥o9®èîA®€+*†Ä÷½c÷p‹ÕãÑ+¨õíÙ›Ë ^tÃð +ef,éX¢µxdc +~¿H+Ø« Ô˜'rè®c´¿«Xƒ7Š.]¿—ºOì‡q¿”D@QTp %BfË¿lôòÁ75¦Çy’ûÚÀDý6·l›pme¯øŠÆ©`ìb÷õ.×)V㪾5°™鵕Iœ‰é?Eù–AÑîÀüOö-)|µv¸æ½1²_+, +¬ÂëÖ±ð7¶JøǶңâí€æ.›¹§”åmG£«XlÀ ì¸ïzyD.æébÕP/„îÿûœFrP4|ÇÓ~ ñ]®õšfïóÞÞã¸|§§üPbKû>픉ŠÛ5žÔ<‘ ·¸ñW;›…¦fÏaûÌ +ôQ å¢Ä©Z nÑ?ó~ÓªàÆb朤qÀrªÇߪOÓ#ïÆ#éÕríé¹–cà¥8ÆbE‹’í‹Áx¢9eðJÉ{ÀDÎëžÞ™K´hVÃ¥G¯=,qIM„`ÌG€}ÂÆ”õFúp[ö„¶C=y‰R¤Õ¸8­åm.ð(õÀ_B²œßkö}úò“âý{ 2V¸¤â‰x÷]ˆðήA>Vi^ÜóúyíH¿Ñ>Òõ5æôAÌdf/“¸'Vß„EŒNvï×à©PÈÅaF×Gáå¶böEPË'K½™–Pp'¿  Õ,9©QîаBIŠ©H{1¥ñ<žw%*ÞWK³o¹…ÿòÎÿ:rª¡Ìfð! Ì¡–²â(T æ"Étx¥L6nÍ|9‡ƒh¿A‹`‹($ZÏ¿ÿÍZŒºF­êo|ÉvúÐÔ³à_9Ò°ÚŠ$³mUËì=Fh®`¢¿5ðg®c´ŠZ½qŽ‚ž»®r'áÆml¸ŠÔZ‚l¡Æ`šFk; ùœ¿½Ââ0ÏcoêI q?”kÒÇk2™‰°#™«7²Ë¹A0ëXႲzzµ¶S%å7&mùâr!©K¼Z¸y’î¶K8¤ßW=²ÚÚ0¼ªãAÉû<%Ý){ZQ*ktªñ³"~1[‰ÝÜOp¹ôé„ÈÄäÇ›‹µ¿×ÍvT³»(4ZL»Œ+ønUâ(‡0µhDqgÅä.ÿ¤v†ÎÈà.9–Å…ÃNkao”|—å(îËÃÈÉŸË3œJî]¢[YFñE(“}zÑå•XF©ù Ò2,‹<}|s—ÝCGÎb¸6ÚtÛù*lrny]Éüð©÷Åð$-ôɦbnF'1q#…P¬!¯jÀSÑm£Cî@õõEe¿3O3É•ùú=ÀÑ’žÛéÌ]ü•*€©~kæ²f$»zìNGé7v +s‹üs¡!²¼wfbE¨Ý^ìŽP/vV"çæ}£ +þ·"þÅp®®ä€&DxÀ¨0µR+Ð1DÈ<4E{W"†Ñ¸§.ENÖ>71,[ÙúÙhPèWRI‘¯ò¤–„8üÏëý·éUìø"M—È,½þçmˆ P0ñzKz`´>¾µè]}Üû£M?0—j÷’Lçž,ijFf}¡QJ‰RQ0N•-¹@pd[LÄ¥ÃO*½À™ãå2ýmˆÒn"C+.GæX“7%¸RÿÈOŸéæÖÞù:úx‹cÁa%>61jÑ'ÀÛD¯x(yð@†ËhUÝ-fð÷z2wêæ;¯ÕÜçB1ä¾&&Ág92ø<|0/•"×”,"jܒѱÁqmÒ×^7û߸úC™˜³‰c0ð/ÚL€qè¹)¿9lcé‚ aƸ–¿PzNȲÀ)Å®þ„Ah>6V-ÂÁwœU?so$]v¶Ú÷rÝ&¾[î´ž”ÞŒà,‚níYlO·lž5«¥ˆ{ÝzRiõ«N6_™¤doÆ¢SNÐ ž zoú.¨Û4S»vªV „H{Õ>i†9dÞìæ÷GwjVâ*Jß.öÖz2ñCðÌVÞžH[gçeÌÑlÖŸF¥bg’’Šw\7¤êu%1d¥F­ºCÆJ)ô”?x¾AFÀ­¥¶o]ÅÑYÏ_¹ lüÕ:…áí×7´®¶·/0F€Ô(rçí+¬€Âsíw)Â'>èˆ7q‘-i‘ú¢µÚž¨Õh†Qá ?Õ$%LòÔ«fÕ²=Ö¾!4ì !Aþwr¨ô\ ÝNSÓ~ò8œÉð¿JŽiNJ…¾píx¯¶„›X¢Žp ±Â4sÒ‰bÆ;eMnÏBBê£Fº˜Ð&éUù]v¡’g×ÌŒ|z k/•¹3;‚ÁnËøé4N\%8 འþj|.ŽWgÖˆd¾Ì4º£ ¾ÆVÏÈðò#Üž …ªÈÀh>NFV)†5*uQ”ð™v˜ÁUU#,Ú?Ô ƒ~·ÁÞcõ®Â3ÚE$rn›#§È•»AúÑé÷&"ðoR¾Aò*§LÌ)WÛ°A«I Ê@ÛöûÔE ¬koʾ|,]îõAªïò]€"7M_Qc®©Ñ kÊ–6«%7#¹Qãf›Ik $.ŒÖxˆpŒôÊò Àsé+¥tB|¾ ìS@;2‚µc x{baTn¤WZ +êeä¼>ôòf0à(hËh ñÙžð +–¼Æs4[œR‚ƒ[âÛ^ý*FØ™e£^™ŠbÖv†Çþ¤ˆ× °Ÿ„ôj©Ñ«|_¬ó)heS.y>y•*·&6ØæP`ƒèª¸ öíÂ7Ø8öM)IªÃ5&À¶Å1@Ý”±Q¬·˜çµº({8ŠµÉâÉ¶Ì +éØžzBX‡—¦½ƱÂöî´«ï.~K55>=&-†Ú7æŒå.‘qòù«JÇ‹»Æ÷uŒãäiêþþÜ¡ïOzP\'L™½[«©ÓJnOœÅ|Á–“˜Æ6?ÑÈï±IËgA 0mÊPÝq•]c)?çdºС–Ù%¥$J¤lÚqâó?ò›,õk=Ä¢¾Ý·”4RÈâ"ý£PXëcó¦0K\J‚SM_Š´é“¢¨é« +ñëN^Ï¢ÙÒ«=ô½É¨ÜØNÙÇ6(°ñÅêÊY¸.h–Â5Ù‰£][ÿ4ø·ýqwÑÙf ¤Ç"/ÀX•hXd/ìvI»ºm­Ýç¥GfØX÷ +ÿK=ö² ’á(ÛHfUïÎãrÒø¾IQ$W0LÇòÅÕµ8ŽÅX‘k¯ß‰ô¹­JÏMÐz{äPãÀ÷–¢Ô“‹¾>$ yò§ÑéÚ"Éjrº½¾‹Xf¯yÓLEœtnÖy“ÉŽœ‰ö•ƒQ]Ex.ò¢B‚…;®úˆ[„çðÂXánÍUîOýÅ›1Nd É¸&÷ïÎŒþ +endstream +endobj +2234 0 obj << /Length1 1616 -/Length2 23613 +/Length2 24418 /Length3 0 -/Length 25229 +/Length 26034 >> stream %!PS-AdobeFont-1.0: URWPalladioL-Roma 1.05 @@ -27423,7 +32814,7 @@ stream /UnderlinePosition -100 def /UnderlineThickness 50 def end readonly def -/FontName /TCRNJT+URWPalladioL-Roma def +/FontName /GLTUCO+URWPalladioL-Roma def /PaintType 0 def /WMode 0 def /FontBBox {-166 -283 1021 943} readonly def @@ -27434,98 +32825,96 @@ currentdict end currentfile eexec ÙÖoc;„j˜›™t°ŸÆÌD[ÂÀ1Æ…p§³T¤¢€®o¿˜ˆà9«`ü¯….´Î:þ¹yÕêpýäJ*åÈÀl'¿–eî¡}#)Á¢Ý&»7+å‚/^§ ™ëWŒ{ïÔLßZ60VåáÌQR^¦üλ3rï÷)€#v¢€$öp~c—²¼´°£êë&“è÷ö'ÄÉÇÆÅ¿ñÁä+ž %;a~•ê!”ty`rô³cþúv5øb‘/¼W©¹™zî²#gß|fà‡ ËÞSS°CŽ¼3R>›NnÁ'Р(ÒeadÌ/³¾•BÒ¡ ¢ÑnKs­Ò¡¦ï°êß w8H3øETzðÒX‘H£W‘}ÎàæÒó±EpúÔÚÅsg“¾cxHSAÑB±Î]& *Pí>9Ýš.‰ÎTZwŒ½¢^±q$ÛšÎÊS²Ï ¾·QÂgÉX¯ušV,Ð’"ª%_â /w!¦•ÿjßTšƒûaù­¼ÝJç@¢Æâ„Pq[¿vÜõ·ÐM~³ïˆµ­ZIêf ·†îÚèè1÷N4Æ6{ÉAö"‡Ç€ôŽ¤ò<Ãhú8ôØ›Ø)¢²¼˜Ð—Âæ¯xl¸aì®-Etö T"ÿ5ñÚµ^1z=Ê̸õe.°ä`•¥Éâé`̹–å,±âÀl‚Iº þõpïpšäЮ#QçrWJ^URòlUïQ6ë·O {Àµà6OQžD´;•Dn -¹`]NèIHΣ{¦ü+¹à×Î#ß逞(BUndê/’‚¶*ƒ ÓošâÀxq¸Ýð{ïo1Ø1«€Hÿ(Áä虸·ˆãÏ–à ¢Ÿ•[äävå ™‡½½i•ïÓ'¾²òj"E`$•Ì·…ÄИ8K¦°iÈbÚîjë{äÃ0eLáÂUluJP, ãiËÙfÆà'ÛªhŸÉ7 › ³®$6â༊:f‰W½<Û6 -%)p\àÏçèeþjÊïÝa—ß\" ûÿ9!œg‰Ç @GˆLmúÂs0½_JEn|‘&$SÁ…Ÿ·úÐ@È\mèªÿý<؇Áu®Zý=½]¡m€jX “æâ}i·¨]ÌP»Û¸ˆ]…à'`€ùL¥Õû}½ÏY>ªÔ;h÷ñŽõdã¯Èg ¨¡û‚ol^k¨‡ i‚âŽEb¹µËЯ'ÕA´vàÅíYqe}¼±ØnŽ—PÞÔA 5Ñ]«X]a¬î-ÎæñÞãš]3ÎÌP›ïi1{#>â訇6sàIŒæ·•Á„÷$G(ìF´ VèŸ[y•·}/ônÂ÷§òñw"Ħ¬¨Ü"}©y Ú8çUÝók¸n‘éÈ'zØûaâñyÄâ5vóT¦g¤g!ï]®¥`WÙn’mmòº7Á[ï]‡lÀ•’2N>|k6ë—Ù ,Wã:á/lÀþv_*ùpñ¸ -jÛFSÇÊ­h1`Œðwým wèÏ¡>¦,,ÕÂ.$aGª:>kÍc½ÐìÛaF‰½VG¨»bF>#ì-¸]cÓÝÁjRÔ¸úŸâjrÃÜxñËTæ;{ê^è1ÿïÿ €æTão¡ ·)a_c]Î#ƒMÄY¨Ò%ð™"#¸áoyÌâñ4g&Ôö¶²è’)^Õ_8ƒÒô‰üÜ‹KŽ?µÂa‚£•üáB…òÔY¢ù.cñWRó²׫Ä°ü§Wãœg`Àöó ?Sù[U Òh"Ï×Jwc|µò—=¥]T$ÚX‘×{ÄSHNÄ£d«´—ÍŽÏÚ£½gÍy7 íü œcÚÿÿ†&R/4z~TÉi  åÏ?\u(ç£Û­ +· üñÒô/oªà>¥<³uY$éÿKBžYxrMt![I¬<èæ²1†:Ê—Ç+uªnÓ=ëÆ(-ž¼˜»lço¯ÃÕìiŠûî$ФûÊffd¢ÜßCVìÃQ4}2\qæiØx–¸%Z¤ĸèÁQLZl)ëëã⟜n¿ñ'y¹Ý¥q5LÒn83£V­ü<߬ËÙjúÎÎ7Àf>„%6Hèdu[éJú ר@DlîÐôgóë¦q¨Ÿ‰a맪¸.^©óêm B#§ß&7ô¤ÍpNŽ¯ÇWoã.®¿XHà -7Òu,š_ !ñî´DLÈ‚Ù™õ2ìmTÊ»V-íMV‚2Î÷7?„Û þr™GŠe->4paÓ¹Ù2ƒ=ƒü·|Iö ¶ ~FJ¸ñBE=)?Q¡»—ÚXbogèsó“V»=…’É"¸(LìÔÞWþ¢ˆm˜‚I%Y -èoŒ1Ü©imÜû]žiÑâeÛ/R¸R@ö»óÖG«æW>QÅ×9Ú+@ì7?±7ңªZÆ8õ_ê8ÕódG¸%1ÐBå ¶ 0 kC[}Ëdü{žÑ“Z[x’œ)SÓÿ#WÖß³dÄš¥ÿæ MdFÖ,*Ác‰ÿ=uói2D é왥–©)ßcS6Ò:œ>³–èª+5xë¨-´6â”ñ%’k“°Ú0³áu¤šÂÿCÐØ+Ÿ ÷“Ý3Á¡cr?nf®ä hj÷áãoÔ¬D“a#/],mE(SmÔúz¥¾&*›#™$’bmÅÚ0ÆEÉò;×øÅRº?@zx~ÀY樜ª'òÂ/:Ûœ2ar6Jžš]Ãþî[‰àÑeýÿÉ9'‹÷C¶ ΤÆäÍâÁÿOžQkšâ®Lm9r ·6œòé¢@gSù s‚È¿¢[ùçˆ0U7…]:çaª÷°¹§AK h…ËS5©²zöL…mÚ3IŸ ±—©ÔUœrRxM›×¼,åQ©û Õ2òµ¿Ë"ËŽxgDôõ%¿kXS°Ö††äÍF5W@^“jYªÅš80l‰x?ÑRè%@ßõ1œg×±ày _¯¸Í”äÓäœ%ãg¬×gƪÀàD‚Íüîže„8%MÒùö\è§a݉ão.ª³·vT$R'l.1UøX¼²Ê¬ûºlnþ8LÂ!ë3ž_±'”/˜9Iýêñ¤¯–^|‹ŽcÜjx¶ON‘Žø”ÏÔ°’‚QL%"¸“[‘>ÇAñF -Iâé&†¬Q-RÅ Ö Žø‘3©ã žlz{%_> ù{—Sži¡:–ʵ¼’åë” gò­±BœM“î¶ÇÃGiü kEªÑй©ƒÚÑß펎V==ÄážÈÃØè[vû±@ðÚÞ嬽»C0xSøI¤'¥‡4ãhÞÁ¦¸ÕßtZÎà!îˆ$ JÅý¢'?›²9yA¤'òפÜ3UŒIi”r0Àƒß‘å9Â8)”JQˆv‹O0ŸÎ<{>hçá«ò 2CG‡¡+ý>áÛôŽ˜%„EâÓïðÛ<±N1/± -ꃼ”Xp>î¯ýXþH¥™Äž¿'e?¹pµ‡€G(¯ôCc+XŠI¡\æûTÐb“ó´Xø3åZ$V£ |ÜþEsPæ± r¹Dß"euˆÝ(ï´7áÌ ö‘@¯¬†þk¡ -<ÚzW&üíY83LÎOª*¾~ïb¡÷ áʺpÛÝ;¼‚Ÿ¦Î…véMÁP+C -¿.å–³+³ÙËbßÂ<±™‚sê^ønC¸JÎ4Ö7 ðû¤%ú‘‚žAÉ"\Ayý„&7ç;Ÿ ®ù_†3'‡)€j2á1“oÖGáZþ>iJ~®G Â¹wÊ@Ë‘!<˜‘ôï)‚½_?Ž=Ðÿo*›|Ib\§§W$L)µt+Dq—r­3Ö°ÀYl؆5Ú;”á­ˆJ¢ö¡„ä|ˆí5¥9Ü ?oU -‡ÇÃijØ<¡P™ìß:'>Œë2 -‡Ÿ’D°n;Ní»’Z¼µ¬n!¯êïM' ²N¢ çq Aáçý -´K±h94°˜·çˆÞ¡ŸeÙXŸ‹ >ξVŒßw‚ƒBy6¶±µÈ'Ù¶3ý\Ó.D1ô”ö{ýæh]åK¶ù…Ç\ÕX*äØJ7Íämºó Î »QÖ[u«û.žÄ§Âö=ñno‹ÝDD<Ý FYÞ£ƒ aí—äOÌ4õ7gI–yN¤k\Và郵´¥Þª4¶`X~ª›]c»g·¡7 -š»¼EyS°³F`Õ “Ïeä’‚ä•…b¡>ÞÙINtÞ& Û«ñ×qv­è6¨¨~°6òèóŸ·¶{FeDlZSVFB²nRTPŒ¥Z/›èõ º -µz`g­ÆM` -ÔËfØÆIiMæ+•4Øûÿ5€Ÿ j¯ýœ:ÐV¹ÍÈÕ]S_J¯HA‚c¤‘1¨úzw¨¢Ÿ#ôgœò¶Î=›°+Uk²(â¤Ã™é 4í}ç•ö7ŠÖÀM°Êæ^Ÿ[å©\1d´ñ<‹Ï -*½­ê^òºþeu‡ËŠVYlšMœý›Mü;ì -WR¥„ÃŽè°û ½]\ ÒSZ|ÞPÅ?Wu׬X“È÷.ë±î¾µj\ß’hž„S¯WC¹Ï }3¥À¢ ™‡Œž ŠÇÐR5 ¨pÓWà îf"¿Þ#Ègr¹ìjE?9ñÅNdðBl¯µ>M§†ì£‰šú‰ û"¡^®HÊ Båõd&¬³Ú²Ì}nŸ3]ÓkÔñGoõP–¿eŽÜxhW¹»LFåÀÜ*$(]fµŸV\s‘ا/ÓzhUSíéÌÒ}è(㎧õ§õâÞrö¢3Š#:>²G£#{æë;ˆÏe»¤³±MÚÞ¸g¥[ò4º;‘Їf T:¾˜î»û:¾£Tjë˳%¬X?1Z¸ÂLöbÌTsÃdbUçûG1ñ,uµÞq ]WÍ‚´Þ—û¤]¿­ù°‚€|75þé½Ïs;ξùE#„~/AVx³áÚ&ü¼à,ŽÎÊ!ÓÚ+;±¸ÃO™¾Ù6Ó÷p<⪳’Ыÿ·xq±fdãéu³ »[‹›Gf <µQæŸ_$Ð=nûqRüGÕ ß z >pÎFù}Ñ òóÎÎ]¬ð Γd “,ŒKYÍ2s6âäŒÄ‚eÐ&¢Ë|¹6'šÒcœÆs6÷º•¬.’»â×ëk• -œ÷(ûãöJæhnë¡}Kÿ) -–a~ß̧½ù°-þ Œ·LÚ9î'*ß]25RqŸ6gOu–´»‚~I΄NJZW›1›Tú 0*ÞðUgÞ??¿½9¯ÈŸx.>¢AÊÒˆ¢E@Óâ’eYûáØB8ùncç±$ø†Ø¬ˆsí_N›õì]__ñKú«ÖV…º>·wqZÝ,2 ;YÏÔíè«À¢5œöІ‚ÌžÑi³X©FóFxbº×Óçøž ·ÏÏvê,,ì" Á-è²W¬9«ÌÛcf¨šü®X=j*çŸUr~J{<×Ɉå~û ¾N /ÅàÓM¼ÆÓÍ›* tÌ*ášh îøÒJüu3GÚò¶‡ÝŸ× .¸*óû|X`‘D',âjt/ªz&ÔéšõœÒÓö,¿‹J¯Ø“°±®'kØZ¡rÎóQ‰µÃéO®ÃÏödž™Ø,ë ««ÆjWþ:G}PM”‘Šï¼ì"Ókò1qô‹ÜvE¶‘ñLT'扼šº «FŸ,O"ïTN§çscF¬gúÎ\T‚ûøÒ”#äzh[;ÇÞÎÎÅ-n^ß^¸œ=Bî·9<Œ\¡¿O.ðóƒßŸ_0ÖïMðP)7:ž>¸mR§7ZØhКcÆD}|˜{3kFÉ"¬;‰ñ@%æ+t1Â+©ÔªJ2:ìI?— C(§ÎòîV³Úç ùa#û­ùo±ZãU½›J 8ßyX€cú\x€-Úž!7»°Pt~}Q/œg)l·5ÄŠeO5Ž~lì”äÏÕ.f¤ÿx°é$±=È“ŠUŸ [òÚÔAr^☬qÖ7¥hƬ7‘Øå B(p$¥M§:8_šønöãõ!¾#Ï¡tz¯ `¹G0¾äüê˜ 1‰LøÒJ <¼=®F{,0jk&ÇG®j¢·u\Kα…ËöóbÁF¢Ú?ZŽ®–{g8yzü÷~\J2’ͨ!#ä቗xk§.‘º'Yu>–£'¶ÇÌ Ö-½Ê^µ|ƒ-,Å6Jµ©núöEd]'ܵ iÖç8ú´ü¦Ã/¼%¥ž†K¬ÒárqSžT=ý¤'/Ï“î:4Ãððá]]&§/H—ŸŒ<çÖZ5_ÝbùŒªìÖL}„hÚmÏ'g§2~IÓZÉñ8y`û„>²ž;jPf›Ñ¤ƒ†Ù©Ú -h["šð¸X­Ð£EÍQ8,ÑšÌJXxd5i檾5°™é“n›,„ˆ¸¸úmTÉÈJ`§9óÍl‰B¶Ò.LEÜËÑÐ< Âó¿ŠÂäÎó?jÚiS"õåü® ~¢kšL^Š%wk÷¬½Õˆ…:ùß@ä"¶7:˦ð8âþF¢(Ðo.•uàæE_ô%A¹V¢CÊ ÿšN™ÅäPKp–àaVi17.DFr’”7Kî%”a}´¨;¬»È"¸°ÂÊæàщՌ¨LU €ŽŠS(tv/,,å€ xß8¢=Z/ˆmWÁ†hÅú.ˆ‹† -×Cs/%ò„pµ`¢°u7®® ñ ×.®4@,99媦VȪ›Xc‚œ8Ñâc\2‰öñF]Ï3½.›~r ß|ImÃ…9ÔMãÎ@é¶PæE©o£šÁy ¥»/!-]HPIíŒRnãQ%( â= -€`W²‹a"N¿$ÿ}ø$Íít«ª@ì¸[vem Ùgùd&òLÍžE*“Lffp–10<‡,šûµíO‚"ß÷&ý-½uƒ‘ê"ª¤‰¸zHm Û´:B×Õ[@4µ)ãåÒo¼Ù:$¬^”V›¯68À¸ ÑKƒb±õ¶,sà ŒÄ½c>í†Ö-¿¸&bWÝ­f'ÑÜ?âÀÝ•§•Øˆu{5oÜVƒÁôÚHí°ž`s¢çK•äl9ë K7Šæáž8‚)TbNoNÙ(!z¢v:ƒ—ä "ÓuçyîX-ÒÔ½ŽÏàh- ×ËÔFÑe WnÑâ1lÄÖŒË"3‰C•µHNÓw¶¹ŽuUëG4 (|—Âל}´à±í h°ÝÉ'3˜`«3³iëú«ÁÊ-Q4ô])¦³O¨Z¸K²Å=¿i@ä™UÃ3ðÎLx–¥h{Ùó=2vͪ, P‘ÇkŸë츜k~"Æ»]£û‡jd8n… ÝFLfÃnäÏ¢ÐYK²Ïlî”-k$Çh¯‚“YÙX^d9 %Þ¬_ŠG€%Žô„<9íÖMYý”æ˜Ð¦P£ ѳ[k¬×°€¾wÛ/‹œ Ë©%y®r7ö{1=]˘ó“yùæ"GßxË×=aM Ñ‹.¸@áo¡Öû gWeW8$î$é¥c¶ØØ‚À—·¼÷Õ­d -%¿t*tÿŒ;,kîOf6”RuËJF’9?VÃ9_m›Šäý î¬ûW vÐ ò/Mj7D¿[¢äYÑ]ª,mÕŸLµQºãÄ‹ɇ˜Ižc½YjóížáÓ´¤~Z°á¿d9C¸Þ÷ ¥È™|Z|ö}†mÌ’™Ä¨ éªçŸÇþ狵Ó*8pÞù—T¡ »…•š)ú‰u*Á‚ÿ…Éïa‰#ŸR7—­6—K‡!n<¨¶ï‹h+ôEå§< ,·Vl¾æ«€R÷ 4CCÎÒÃЇ1࣠™ø v×T…ÜHÇk¯t"]ç2Ÿ_ÅÐà%ûBÞLbR“Ÿ{QJ÷–¼Ý…”#öSÈyÃ?6¥ã¾)~~<BÓéWe$k7‚ýÜT‚|™F¸lœyS•tÂÄž#ã³æ(]æw‚ôSíÅH)P5iNy˜@Sü›fÁýÐ1Å¢HóÑÑ8Â3þÔÓ¥Æ^.£)ö¦ØšpfœˆJv]}ühàwÒqò}bÝG`ÞoЊº3·+âD!Íöñ«v€Š6ÂÞlb${ ­:â*š£9‚&³«JeWH­Ä¹å<¤ú2 YêÏÈþôJ€i¾—¾;"ž_ôfÇêúðÒ˜fͨ‚˜<¤èþXIöBÀ€ -‘\ ƒI¾Å “ÝP†¡¼‡Œ=‰P¶ÝÖÁ²ùÐ7Ÿê(Üõs`;AÛ⊸4ž2¶Z”y¿U½6dmåì r‹p#ÃïK¸_¢&LUÖ©˜6ˆ`ñ-V Sÿ€CÉuâo€WöšÞÒ(FLÀ†Mú® -çù·8VÁ‹cllJ1 Rí®ÇÖ€MÍË›0µŒÁmõ: -wiÀ6›}›ì;@|:Ñê=ÔÖÓïxí¬"éÌz2ßù rx¥– cmÓlÕiµA^a<ïÖwPæ“pê¿=ÕXZçš.B­d•z{$ì¯'hþxI’'@þìr3[û,èeÈHïq×ÛPßþo–´Øô¢]­é±½\ñ”ÎÛ›*žxt)o ƤPÚ‡gûIOf rö…º¦<ÄÈ{Xp¿®É²¤•î$P N‰ôÍ1©<1.¤gG0Acîùï}kB<¾÷ê;U%™´!?̲a?d/»ü‚ ,j5›¦IEf¬e8þ„ö;–¯7Ìž.ã—Q!öÀóLÝ¥ÆÐÇûŽ±²_P//8Çáy¥ºk™”žË xN,ˆ®÷}Õ¼N™,%&%õ¶ãÔûÖçݸÀ³ÇwN@«`ЀÖŸN÷¼›Qõ¢qèIåþ^<ÍnªÐ¡b‹câ1ÕPh'†8@~¾¥ dˆöJA=±¶ìHø³°ÜÏÿÝÈÒÑ«€´gqÛ -'ø‹—¸7dÍKË=ù÷¤•-¢ãÐ*U(ô`¾ßÝÓ‹üàvÓKØÿûªcAHôb¸¹H`£2@æó¸íÑGƒ¿OFÊy‰Ó+Lå¯-1¤«Ó94Pdeä¡·=DÁùC¶Š¦¯?)«Æg Þ?)§nטQQ -èæ".ÔÕÆ_p.T¬šdFÞcV¬O‰í>Àß‘ÔyT&³ãÉí?±©n´†¬›aÐH±.Àb kÔVÛ5-‹få›å>Ú³Ú}YÌnúWŸŠ7Š 0Ì鍊3óÁ55á|ˆ‡Ò7Fbc ¿À;<¢Sf&^R:üÊgýÌC‰ób•¯švz£©ë.\'ø±>Š®—pñ6Cwå÷^4_=`ØÅUº-ñí¹¦Uî4Ÿ VÛmbhÆÏ °Ù>\¡Xí|›ùaXFî6n2¥fÞJÚ(‡{k†vÕ?ÖÏ#K -X›1aÃH´E±;lÿ•G=0H#õœíÛ †íœÔ¤žFÛDkjÉäEöô½k© Él™®B¸]´˜‹ÃTó™§jV©”þõ‚#û%º‡½à¡—Pʔѕ<ûïùT‹äÔ -À¿.' K?Ë-¸ø|ˆwAÀ¢U¡mêHàŽµp“-D\ÔßÌ„šp#ñ¼Ü™;1Ó :¿.±°êÅÖ ÊOÕVªŸ±ŒÓx¨ßmc=Åvqà vü_?1 q‰™/Ëk9’,+ÃæÜYþ6ãà!ÈÏâ¤Áœ‚mÑ´Õ¡õÓW&¦¡'½«J[2LÕ´ÕR«é«›ŸI´¶¦»(¾_ª} Øå))x=ÍÈ%‡fGo°µ G¼>ÏS^ÅF¨ÍbAfÖÇÁíKÎ@îÇàÈ?ff%h…æìoá}Œ 5WyY×ø¹]ÌŠU¤úÖ?ÝúóQªçK¦û¿31ø{O5t(œÆ*¼EñKEz‘J°wÄvížz~ßv%¶s+å;A…´é:ØA'*ϯëˆâ>ôE…ó#jÏêÒÏE´3Hõ~½8¡ËÞ\¬Äx´Ãö÷T²6k9¶*çÓ7í´f8ëã<±ðCŸÂB~³°àrö±0 -/ qvP;*¾Òk’ã‚ë/¥å³úœò]žFÍ8=8Wñ>„„¡þa TêÛoUw²ÒGtOpwN` ·_[gÕAC Hż¨D%ÞJ€ ÐÊÏ(¥ñ^+ÓPI úÊÎèÜ ˜ÝmÍlRX£\/6ÕŠáQÈ)Âo›ˆ¬œ ó––fü^¿ ‰l× ø „‹ŽS<ܧäT’0¾—Vþ#”[^{£Mk|ø¼ë´iÔ 9§%ìŽñžþO‰Q}LS°èJÔ…Ìçk“7¦°¥£›P¹/ I@ Mš‹êPØ©õAº<C¸tÏ·ÛïÑ›¨½nOY¯×Nr­Í{görìxúãì~Äc2gD€ß¢BuÏD¸@|+O}2M ë4±ã±ú¤,2ÚM$8Cµf˜bRÅ A'æ?ZÕšañ¬Ä¯§Sm“…ô–mÑKÛ­ñÝÌ-(¦ÔÉÐûˆ*À}Ò;›˜‘$é–e+&ìÝÇÒô/áI§p@C›;mFôÀéeQq){åÅÈXxüdž½Níhá§ÿf[ïpçmŸòY±HEµ³AUøS™‡Àƒ -<ºÁ¨Fjýê#÷Eä/÷)¨3\Çcœ(ÅÀŒ†r]dÖOPR2È9´øÏšT }]â(­Wæ¿y(R?“C- Þ{³UŽÇ­BÆîøJC¡æÚTGúP )D|"6Á—rÖ­>ÎËu*0 ˆi~¦rS¥Zé©Ùµ…ùÒ1òø‰¦‹²£ƒ-÷Lî°!VÄ6¡þœ£kF±¤® )>AôÆGü -K©’Yˆ ê©>ñÃ'O0r¶QÆš…WCÔ%ÊÂhÆ» Ó¢ˆi+3yÉŠÌbh¶’̃$ŒÃ‡zmT'¢ªºtôná¹gJã'EJjI\%LúåùôøJÍvb •×P³.Èés =)¢U¤½¢„¡ µ‚Ùfê>”„€åí–ˆÁ‡.at<ÒRí劙 ã7xÊRxQD¼³;j%À_”·®fRI쾦Ž+ÆpDè|¤¦S…ŠV kU«pNTÌW@;Lµô—™¢ Áш֊ê2º÷°O¯¬Ím ´ŽL “é«]ÞÒ/“Š÷ð^b~Bò·RWzYTí°­½Å¤ÊÕ‡æ]"Íùk€#<•‹w¦=œ}¨üØçF2ÿŒ÷9Jµ¶E,ë†t®#C`¶¬²év¬Ó'èGØã2N*Ô§ÿ£HÓ!ú7³\ÐyZß%m$\»Vi2SV3¦à¾UBÉ©q)zf†o–ï %4Þáâ§B¹ðòvM²6„\ì)®"&T|N¡wb­%š°6UèÛžYjžžzëÚÈ}^%:1‹¶¸¯ •-ñÆ…vûDé²²£0H ßR>~“ƒà_´8¬“f½×°ø{àG J·c°ä‰à;ÉÈ€É?XÝ,ÿôCÅ°*ŒS EÛC´Œ#)¢å:¿tÃá&¹‘xÜÊ›F/áø¢ äÕÉ¿0"8t -±Ê”ó‘C]l˜å–ðol½I^†èó¿ØèÞ]Xº“4âtâl›0ªîŠ7»Žëùé HOÈÈÖ7ööÀn°£¶…x0ümì­µKÚÒýé!qä„2»«•Ž¢:ù¹ <ðh—â^ ·YMåøeu…z4ƒµ, ÏrFÛ !.qÚ -]q‘‘ƒ)Åä’™›¢ Ô™Zb+¯~!z.cŠÝΈÎPÌÈ€Óú\l;%H£‚øFÌ Å '®}q÷qðfXßÇýiáfæ¥<ú·+©œÃòö -–ñ‘lNú¬eó‹­ÖålPlð‚Ž›‡8 Ô[Gnm˜šS¢˜6f wEÔ$›ŒÍú‘ˆ‡‚¥$jÊÒæš#',õ™1U„#¥XÓ#Ö (Œ5Õ¸y¡þƒ:¸NçF'â$YNßtk“«–)Jz31ˆà@W]´OˆS5Ë(mïu‹Ê/é}Ž=/’Bv®‰üÀý•>I·[îÛ½›ëIÂUý]Ö{6-H±05>¬Ê—Ôæ$²MDí=?v(Í-Œ§¡±3§EØüì2Ñ')f$!œ½áG!†O)ÎÐÉFÍeÉaߧ©×X -Á[ÈfVl±J;Á5ŽÃú"á_Æóì` VË–>n¨ BÄjq™ÇQ›*Üäùyvwpÿ–ÌÙâ?rãõ -‘Aªüæs;7ó`5å5¼F8öÑg.ã7ˆm €áÊUÞxu´äÈl÷bÈkyQšû¹¹´%‹R5ZBbÉF{–£õ 3ÆÙþL„¤ÁW1¹zߟ–‹O÷]ã]4c/" ÇÂhâvWÏÕNÿÖjõZ¡Jñsõ­;¯‹7ï Kkåg®ÝÞ§)E+eÒ¯ð³iû~—iJúvwOÃúZXïø¬Ñ2Ʋ™ Í=¶§Ñšnõ-(· ëËÕ;!˜—PÂÂNi—ÌÏæ%šõ½Wms`®¦YŠÚÍOº -LÀ œ÷¸FcÕ/R"ÚÔJ-»™¡s|½”W¯Ëš¤*Ã?bÀ1|Ñjçº.f™‘§7y QºÑÔ ¿£í üW5öÌ5qÑ Å } X´¯§6lO(2œ:>";òÜv,…›Ü¨hŒ‹z0ÞE:8¿ãn7Ãœª=§áº¤@þat&s‰©°c¡h±†Ù¬,)›=Ä.ì±1Œ°_\26=ÛMNiÈçO€þºþ\5YKœ½$O­ê‹«sy_øìo9È[¦‚ÏœÆRg#Ÿ¹ý§úê{n5,fZÜ3ƒA<Ö jÚË’é-(¤´M`‰= Mçœ_{¸^£y=w÷R¼º ´5ÁøȶqÚ·eÞ‡”¨Ÿl æçf3Hº‰ñ«¼ä ön¸ÐzûÉ9K©1¿V[«syÐÁYjN(ç8ÐÅWì—ì¤ÁW1¹zߟ–ˆ"M™}àeáŽ(ŒŸ¨Ï{»_ë,K^—QÈÈ9qšÃž.^Ér1’bʦã|ôÚ{³¹Ì9¥a¾¢7 r ê*çÝtô3<áô+%ï+AH`ƒ-d¶¾ -“Tm€þRîu°]fi””±m+~&ÜO$‡!—OƒÏ@Q´y(È«ÜÄ„%|2SÌ=•*éO0À #a;…07L½ºŠÚf•Š ©AvûÏÉíÑ]‘îA•¦[ÀùÏzÅ% ,ÞS@›ÎÊéDAi ÕêÿÃËÀq‡9-"ÿåZI) dDÁ[Ò]Q(==‡…ÝÖ2£‡.ãÇkÙ\½jcóË=UBóÉ–™?ñuü²»Ûó¨iœ¨NpK¼…ÖÚ]§Æ¡Yl*cú:*@ˆ*<ž6CìY9É¡­„Vv0E‡¼Õ~”I0ÜE¶ÈYlAQß}]µ­w‡©b+j6“yžŠˆÒZèÊ 1[a¶¾Ž/s¢#U8·BÏfÈmá°d¢µ›ê—,¸Ê>YŒ¡¸CÏóÊX€͖ʵÑ“Æ]õ™ÿí|sn”_„S«¼ ™êèΠ®Ú¬è4‘#ïãQ=1Ýq ¼§éÝ'ÌÝv»›‹†{ïaæ]y¢Iª>ËP1ßÚÄ‰Ê±å• KÒÅeƒ}ËÏSU=‚”+ý;­Þ)ö#ïPɱ*0 -dC¾ü5 ’1ºHÂ?¤|)QÐêyµëIg´]™$ØñL@<ÜüÇ9 a2H½)¶WÖ+©P!Š:®£Gok¯ÒRj(‰s¡“RÙæÖöe3i‰‹\ê õÃŽàïNZ‰¨G×—¦’ÄK>"(H¯UóRÒ«T)¿vá§SˆÒ…´ä¬0Ñ°žu6‰ŽüÑb6Yœ·ŠçR6—ä åÑ_³Œjæ¶Ç²_ÓÂ&\‡0Ù³¦Eší¶Ç©hÄÍ–šo߸’ÑímÍŸïÆÝÂAO+(£oý‹z†ÿÀH§ÄáþË|¼J›$5Ö ¾¦ÆÇWWn äKRÐÕí.‹rW6ˆOevc¬†^~ö#BÌ'L$¶³¦„Ý5ìT&Í-Ž »Šº ”Z?ÓÉìËÚKÓ¿Øj¨[ý¢Ù&‘×D*wsgÞÐïo’­Lxõ\Mm¨bµŒ,»»…ê2#I¡¦º¯Ä&wO ¬¾Ì‹ž´6+ÞOÉæ½±\ï&¨‘÷‘ê4ª»9‹„ŠËM¶X˜2©K*Ø™´oJ2ü|ë~æ&Å0ÕSèÑ÷WQÑ‘BO5Šïµ‘Ítcgj²WPš†Ê¾&…-V8Á¾zÉEóøTuÅâ¥Ax³é7ËÚ9û‹ÑÙˆÙ"œ6lC»Eü§P‹y±g&Õ>„ ¸þŒaºÔ©ÈÉv°-–]jÉÁ±|¥¼v²ê}AùÅÜí–zçšåM˜RH'#ÆnÆ…zBèSÞ(oK8;õ_˺“Qju±•=wi<2TÍsôÆpDõ÷X `9Âi–ÕÚ«ê…W‡ †’Eh]v&<íÝK ¿ÙNÁv!D{À$µ¾)v$µ‹|fÛÚ2Õèêö§TZÀkv~w+bøM*òø:Ž%úšóèA»ÑrÌæÍ+`á6ùš­˜„ÂÒEKñ=âõ®¶š‡ -”ÌAôÓ›m_Åá˧¼Îß²"hg#$Ÿ\ïá³Áaà–[¡÷1}Rõ¨„V›G¹ëâ×?¦¬ø´~ã蜟»g°E¢.‡Å)qÇëçD¥]ep[_;¾ ÈÛ‰ Ø4iص“ ¬{ð• ù¹.WàxNåö-¤%¤' ]œaꙂgûQÏBî­{°ª@êÜú1Š®¯œ&3ƒiö‚™‡Àûƒ˜D ~` ¨ ß‹K¯å¼Ét¼ÂŽU‘H° øøvíÌO8@Uý7ÅuraY<”¹:L:eèÈ á„+§‰ðUD)å]ôÍ,Ùšå­"óyq(Íñ „U_ŽÙIRwN ü6îpñ…ØT='ȱí·Ä¿üx¿»ŸÛä§âF«kI'$Uv[ÿØ#Žf.Ó” SsVƒ@:ü3‚¬ÑCÀÒ}!ø’¹®”Y@…bYMÿžúºqÛü›a?ÇGn)´Ö}kn=F·JŸ¿½ËÏÏÇ?|®ýÁWœq°‚߃ýãÐR½.Ìé0]MlChÖg– Ê‘`j¾˜Qnë”Bcm2^y÷¿¬á9¸†ÿÔÃâxÝ¢Ô—ðšÜ4”‹hÔkD qB‚¾!x˜Cq'ÎßÂÁ!Ô\`dx¯Ï0À’ñݶöЋ”KG2‹XTD,_¹·Aõ²åþǧu>Fí^ƒÅ]cÔ.`=õ®Pµ_ATÖc8œKZˆ9ùù³mc=rØ«ÜËÁï…2?û'*#>žúÕVvûÉ=“&MÛ¶›¬Ë+Îp{Ó›HžTEÿhÏ ðàzJ2dÉ ‘a鶛-;±c`¡è¸/þ®ºnÞÅþØvº˜ß\´¬Ÿ#ÏU’м~ûÕËŨΟ7N»qïóƒÜXümª1ÄuåÔsÍ\»%)a‘Ø©ð‘a¤¬jr|é£â,n”8äõY1f‹i‹<¾& C¾õŒ.P1îz_38AâÝsR%³¿úêKo@Z“u¿yjõ èïºæÃsú*Òˆ°|)ŸiÊ€‘õè,jÔ9< _ .Ã4¢°üùR…%|B¦í·ŒqYPç -¬Œ®±ž«6Ø µ¨‚(aGV£$}sÆý_»¿ÿå…²GFCõðÔíõã"K''ËúŠ’¿$+È•óšCC6]|6è*L5áòšÅþº…2>#6ðÖ±òm´¾ì׋Šãµ¤I¿“™›U²«€lU¶ÿpHÕûU‡,Ùå[.:â‹Œ_MqûŸóùWcE‚†ß‘á~ï²3ø¹äø'‹ÇÏ3˜:sÞýé©À¹š ‚uñ/ð/pÌ~s¯ääô8jÙ«úN4Kª5H~&Š£Töȼ#°ïÌÈNÏPFlp²6À¡_Tre—ô»É&É*>ðÁÉ’¬™¿Õ¯ -Âb`…¢½³êØlsóÉVÁ#ƒƒM%Bା¿ð­ÐÌ`|ze‹µ¸†mYKû½;3àPø^P„!øã4g„„¡oœ9õA†ž:éÌ{`‡ÉópâC(Ã+•Ç*$ݬÝúØRÞ´¡ò#a ¥W:XÛAfæêY¤½0ù5»W5>¿›WSlú÷Ž`Â(ÍqnG,(ŽßJ¼-ëÕÃÍ×ÓtEÙÜUЦhu†BÎzñe} ‰žèÂÍVfumŹÏ‡»™»“Ï#Ön§œ>Ì\ÝGA/}ðÊÙîop†…ƒ)ªÓDZÛÝ­áŒÈ—Ë0a¥±Þ/ñÙˆïàD´ê¾4îÛMg HF÷óÐгñàIc­áÙB4J7gÂÃ*-¹áœxÄï/Ÿ–Ã/]\›-æ `#f8íK‰n#ÝNgv#¡Ó¬u^0Õ¦1Ô'ùG~úx"º30æì7V´Û¨MõΗºS$7ˆZõWÐ’j|³µg¹‡vù]›ô­h7[úŠë‰’³Ä¢ôèbÞ:ÄN%“Ué4«H§FcôüÛOÛ¤|õ1q8A§I;Ï­b p;¬;«¨Õ^fëÅÕý'-K„4ƒƒÐƒÜ¥HÔÅ"EºÍâK4ý¿Üó¤Ï„Ò*_B¾=Óèw`2±] aŠØ4ÎI eâ¦ÎŽˆËtzˆQÛ‘›A\ÓI¢=ƒ< ‹u)îøsf­_&‡‚+“£ú¼8xÀcI‘m*çhùÈ -Ö©/ê™ú2¡‡îB˜Åà6^AĪĉܴÊ3,-ų=@–û2¿àí /ÑŠÈÄ< A…ÌŠNðµÌ/òXS©dÔ5LUî×zl×õ¸Rëýí9%d"‘ß&Š]ÈóÑR“,}e…Aè$[}…–ž|.g¨È·C©¶˜´ˆaGoqo<_o­>J?*ÿB—œèºPõñL¦ä. á‡úYØDàmž uõô¯#ZÜ¥­9[„ªp€Á¼ -™‰kU'¾ eŠÌ-i4¸³<‚‚àn5ë \Õ}@ru}ØM[G£±±¾ß*Ï4³~Æ葦~>}ƒ=’ ×lØJ¬‚qUÔª õq5ÉvâéÎÄ>‹¸B­M“îxš%U*ØþîD(®><㹃¿ß8Í%ŸtF¶føòÃèѺÇy¡Éh×cÊ/ %o8#rtÀr9/ãÀEUÀc¨ëjU“tb'ø!«—ds¯Ñ2mÁeÇÛ{‚¹Îb. µæÃ!¸ÕgFë‚VyÔR/â!/™?Ëf>I)tItÃÞ^ÛDã~sRg½gpx Ž¢ÚGÖp÷¢2„ÚÝ©±ý<P«p{ᆛè†6á<:Ó¾ÒLaJ«À×HßÂòÐ)[ÇA0"Ì÷ž’¾ eèå~&%«ï)}- Ø:þí%ÏÕv 5˜5Î~®˜Ô;öùq - L¥C{€~é>ôd†ûEÅ:C;”æ§.‚aT/«VÊ/·I½­tŽCf c7y–¤è -qqv~Þ…[R -ß -•€$\GŒÊËŸ~LSÇôßFÕD² Ô2ûR7«NÙ­œ;7½€ÇÏ¡Á.È2(€†±ŽýÀñœ4ɦ—’ÌÞ¢“;d!òêãFªMº€êvÈ®>î¥oˆûæÀfT„h>›+/,³=†|T¥¶bÂë°Ù-ÝJ&ºú—tÓáÅ‹å8ÎZ„lí8§–Àæ £,wÈ$¬A•šŸÍɨ†ÚœÊòÜ7v"HàTr¥nE­^r¿Òç­]…KOÓÉi[áøvÒÓ*‡À¬K0ªFoX;îQŽºrË`Ëâ¿KÆØCóœ¢<–ظ†D¸â˜Nk¯èáSDAôš¦Ïm… ¾\RR@Ô§U¾°4_Ó®÷˜«3ç¾ÁŸÿö&}YbµqøÉÛ§òû ¤ìéS *Õþr¼ QŽœC¼ø@ @9Lß”VËßÇE iLI&T(ŠÏØÆÅu3ÅÁBCÔË­góåòß[Ê­8zI?Í48‹ÜzìÀbÜÎÛb)u€Êæ(G÷TÞR[¿ß_\Ww‡U‘ÛÌß²ÇÁدGq3ùò#óØÖÀwCé3²˜† {ÉFíë°ÖA½¬‹R$¨Œ´zzÊwØl–es? cF+Š\ôqïh€-“áÆ´[šH¦Q8êõyÌSé,ø´‰eí÷ïÈ°‰A"X‹–î ²',Šj¸RM“,›ò¶)0ËÀtýR` ƒy®Û“^õóšS™b’~6¢ä¦Vã6?:ìz»fùÈGýÅ7x9‰*¾ñ™oÀ`ÃïäÑöé 4Âê”d¿­6±úƒkOËqEKÌŽÍÒÑ-À¼][wœh‰f„æ9•È®.eÒîÿ!îëRîÙ*•S1ö芷†1üªŠð`‡]ÖfÁ,zÒH˜Þ“è3:±×΃Rìv¼ï}òññ¨I-C ZKp*I‰4ï7`Ø+ÒߥE§²%ÀZð€ fÜþNú§ÕFVJ¬°Ç¥ƒ ù(d³KI 6s)•OY5#ÝPu¸x¨þ©½¿cØ e®PÚùžÿEu×d­d‚<ä"<âÓsánÛRÿÁÔ‘f„ßu`ƒàÕûF:w5—oá«a©ÂþÓÍ™ÙL 7¸×HæS-“ý‘hD¾;¢;ÖeÑl W#d¶-L¨Qû€»ÝŒ›¯è)\ïòò›…DãÃçRjG”ëvœ¼™¡þk3°ÿÈrÊu;Qdµ°ë ±­¦§½"#eô`[ªÖâÃœ ¬´³ó©JÁ;ÏДs,ûC] › -QeVÕ~ %rÁó¯zK’FÞ—¿||œ.W{¤Íq`S~iï/Èõ_«—©ÏwvêçË6T¡nªÙ™1[aj¦:“%Ö˜<½¥ÑV¡ò:* -v ÄWô(V¥¦ò=3=ía-väTÓÎ.볫»Zü¯ít„q­ƒE˜o‹ŒIZûfþ,)ƒ¬X& /ÏØ Iw½RÀ8¬ŽZ¹$kÚh’[ÉNXÒpyL8e–Gï+Xh¤òW"þýs[åp†pÿ$9·Rÿ°eÒngÞGÌåÙ{¶õîK5­zsÆd‡ ga1ëL7Üd”¶Ìiœî_Jç.°Ç×L§(¥cr¦ô¶Î•”Á@„ëT VƒN5Ó ÷Œ;E"ž½ -ëm³•d6U2ñ‚…M^?o¾ám#"|Íi s„•–Ñ  -¯ôÓ7’Ã9˜®ý"u)B­ÐÖA|ôR}Šyzi—ñÿJL}Ä{úWNËu]¹¬Ò¸n?Ê…+ -ßc‘çÄ»}tõeýgZjI„|¡Úwñ€Þd¨4raäÛÚ æ+ù_]bq KžÕ}‚ਪó?—}ͤ 1»ðÐõOxÇ «f˜jší>LXêÔ³©wêßsPñEµ‚‡"ðúu6ƒ¸èKFýu”ë¯Rë‘¿xo³ 5M…Ç15:„\uå—FòÅâ$´TZL ©NÔx§ØøuÓs 2¿_ÛèaTœ†;²˜Âì/{¸s”u½®­qg]ô²›H² fbÒ(³-p܆l®u®;.y{”¨,Ÿ]Õåòûþú8ÙY -8¦ê¶Ý눡ktðŸ‡Þ‚0ñ±ü‘ĽTÏ@¾jŽˆ·K£©¬i¶¬9€)B†oJÆ e Oì,V}]IŠƒ)Åâ—‚63 þÕÁ -°4®{«'(× (F¸¢SÇ|Ò[Á5º§&¢ ÚæÚžu~–0ÓÐz1fë²Ad|_i ÏAižšîîKƒ×ûú½ÙÛ¤‡rýœåž¾-Ít§œ_ÑûFˮݹW¬vѳ-÷ÂH§[X¸¶ÓIT;ŠÀ§íÁGPèÿ–!¸Gžq~Œ#§p¼îM!ììeEv_vžŒJW–)4œàÁ(šo;à%Ícpºn³xx¡5; ó€c¸ £9ÄŽ‹-<ÃFób­ LÛ²R 0ÔÀû–@&„³”uÓ¿ß¹0ð=-®Q±#Å Óúe°õæ1¶”8jŸ·6¥ûß@„“VŸZÿzβ/ÏêJ($É””Tr§ªR‹° Zìáùp !ãz"JG@jŠa@lÁ%‘O5(¦GfåFÌF¾¬ÑêÛßÃ3jt©ùªÆ@£÷m Æü" ¡cgÈx‘ÎÍé2Æà‡}D?9 (ߊ¬Y@´Œ[êª×ui¦+[÷¾Åˆ½< ÌDIÜ>…¢•Šèáù‹‰(îz¯"hh1a¢Ò›'ŒPÀ™Ÿ|Øpú6ßå|êBÆörýl¿™í?Àd^V'jÆ)÷†p=nÐpî´÷Ô‚ ìÛÏŒ„=Êè¸nÿâu¯ Ä3rD ²N° ǃO¼m¬¨ÒXèw©ççû -þ4žºDp]Šñ`ïZ=áDÔK¶CTœÍuÒ‚®“°dMÿ¥ÂJÎ’à±Ê¨ñ¿Òã`ûHK~gO”ê èönEµ\A¨pC8»gÙO?xŸ/¢dáqúCé¤Ùø&äZCßi7¯^ðºû&ÿtÞ—$CN‚‡òÁÿË™ØgP+ÁRÕü´H+¸þÃçâ{qQßôÌ0€”Üm%]߬8m^çŽþÊ}Â~+Z:-`ãLÓ¾<­6ãî—ÑD—~ö«šlFpzðY¥›Â(üÏÇC7™*fë fš£;&áôµÈ‚™ß?k>ô˜iÀ[ۃ܌Ž(+:3{&ú'òè©Ù%óeÄÑ黋ÞŽÄzõq_µÙÔwH±wyŒëAȪ¿ZyDí3.«‡Ãæº]§ädžªk¼êMâoŠ‰GY­„›ÖöªâÃè -à¨\os é‡SM5E†YÍbü ¤¡H¨'›éа‡Âîjë®ÏJŽ‘ăhP§iÑp|Fƒuˆ¿®Ïѵ¯ ,•;Ü>r -¡•I#“Ä–³3MÌÈóXÓéíþ+À?ƒgÕALÖEXÁóÀM÷‡1ö,”9ü˜¹9|nË­Àç~ps•›£v>An^N6ÙÅz¨ºæ£1*ŒˆVØ—e_9hÜ=n2£•BAÿ×Rêy]žcÁ2ÈgX`M•#X@>êzŸÉýØ2¤éB¼$èÇt—˜Æˆ[àjD5Ÿþ'ç‡_{^íÃh™ë /¿‘-Ž¶I ŠáŸÚÜ:uÑ9¾¦Ø´këÁÜš§‘,”¿k¢Ð‡-"6ß-Zû9:7/ÞSÂqôöjÛ¸`•wKçj{|„\,UíÃÄ˦tcaÆáY‘÷ ¯*5…‹ -Q ÄS!1m†w¿ÖªdT¬!˜|{¨¥˜ÿjküÊ,Bo+;oUH·B¢­=Ô® ÖUûdÖp T‰X§Âúz§k”¬kÈ$ùè˜YÐ<ºslF¼j»‚ÖcØÂ!c.ΪåÈz{PŽøâû§îqÍX½%;³W£˜"K 8Zãù½ƒðƒ¾ÁÓ6€Æ~X«ækZÚç< gõ€Î¶/ôÓƒëo8‹5ÛB€³ši‰ÈÆŽ~¶µF4ÞO{ŽÝW–Ô…')ž_À'Ë6ÿ)Š-`Í/­£®¦íœ)‘%¾ššä0ãàß­2d¬ÙÒ„lL²a'¸ÓiÈq‘ÿ¨u=ÞLYu'~=rW¡©š?ª:Y¢ô&Â%3Xüªo¢Š+_¹e‡†¨|MîŠîÈ(ç†2äÐÜ´=¿Ô|[ðHÐœ¤Ð•…EÏzÜ}Š4ãB34ú×êµ<‡ -Aò%T>³9¥Cë¤=ÔÑ Ú€c…¨ÖÁè+=ËÖýB‚ä†uæ‘Îd5T&<)çÍÕ¿MQÅÈ0T ä×.Go"TàK‰ìhô2‰¼7aŽ®ñb¼9[£‘híßÛ<• ƹ̉‡ý™ßSã}-ÿð‰fY„öäâô‘ä¼ìëCGiÔÑ ­€Ò“³‘ÄÔfºè †B‰ÂDCŠ|Tø¨vÁ‚·b2`6 m{Q'Mˆj:¿ê2XvÓ‚IËR,|µÄ|\~¤t-)Õ#IÈA7 ½Ð´€Ø|ö /ät†eÄáeg—W›óÅÐk²Nè$Ðlo~÷v &^i …庂ÑÁ7»tÜkÅÞF­©,›1ßÑî–l×­=µ}ýÓs#1’uÆÀ=ò÷³ºbÖütˇx—Æ›Ì”Ì AÏuZí·©˜äÆ=Æ U èø/Ãxû’„ IlŸÿ\Ù¸ì¶ïOŒ¶†¯=æ;>Ù+/0ûðŸý¬ r%8(øxF…µ#Q^ÁXˆíõò{cl{ {M^‘=¢°y^Y\¦zÄe±ì×ƈÿÐÍyÖ b_ ¥ É­½Ìâ¨t@ÖOûÿR -º}‡/ÙœfŒ7¤úŸjã-M2ðÀ¿!ÀÑ$rÇ?°§)/2Ø][Š¢Õj1 %Zœÿ‡š »)sS7ùXŽÈ!¡VJ–l”*›kT½0| òò)i€MZˆÌÏÙûØ_Ôý[Ñ$4!hôv+Cd -'kÜ&£X¦ý¼–e›¹OOâïqaJ:“ÔU xXÀ³Þ^BÀ-7H $ ÷nU«¥ -ÙÖqìéÑWÙŸ™±- €4äg,òý°*Ïùà´A%äm«*aRHd2ß+*t¸ã¬õØ rLlöð ‚~´¯ê?=s=ÿâæ5HT -vÌ•£JUYGTy2—›æìLüsƒïðúÕM·ty"-åÉHÐ=]7×B$K´;´±jíëÍ3ÓùN'ïÁ¶”n^°hGv1HŸ ±2×Bå,É{šI ­€là‘ål„}ýa 8|kZ¨~üèYÂP§Ú¦f€Šªg@<ßL\½É¥QdEuÛ„®'YL3—)Å`…ñÚíÛLÛ"vªŸðRn î;‰Ñ-ŒMÍ×Àj6ê€äöÞï/p18gcöù§aPfРŸLÐ HáÚ†ê#:¸u}êjV4Øw¬8×z) ´Ú°Aõ£œüÄmúr½¾ÌÁ0°í`ψš‡ÎFRÞ¯j¡È| #ÔÔüȺr\&´èL ÂT"·s¤®Óé ÷aZìÑxg¹T7ï0|Kì¦mñ¶M±bjþ…³,ŸÍìÞ9”êfrIFà¡ÔØBù(­Üf+ŒOSº³±TÓBÑÿ7ÈóÂÔ#< “V‚Q¶¤<Çô!»Ê8œ‰tõm¿Â™,»òn§¹æäW=ë÷ÁUÍVžI³¬¶Xó컲s¨bϽË7âvz{ôEùH6NÏfÙ*¼wK9íUn*¦0’ó~Øe“fšëô¬,3¡¶Öœ ÃeÕbºNÌÏîy¹}U¨ÐuK¡ÙG0?8ÙŠÝÁ$ Dâªæ=2[ˆ¥ñÏÀ„=ß+s‘‘ƒ©ßüeDæ(ÓÆë‚$‚8tªfŽkÍf~âÞ|L7‡¶'4?=ÊóV«·¿Ei4$ƒ‹K¥ÚF‰nð<‡S'ʲuÿ丸_&©›f@KR±†`ëµè™%'•Z²TJ•NL¤óp?<PªžB;2ò›˜ŽNEÍ?ëòdÜÇCØ(¶-£ü@y¦$ý¢;¤BÈeg-2HÔ¥©«µ¿rª„Yl½uþZv¬<Ñ;V,^áE«—Bµ½k1oÿ|›™§­Àõåôgb¨P%À_ûUüVî‡Å˜õÏÜÇZ3ìh™¤Ø\Û»¬úK猼üvÑ$úÀ…¿EÊh¾Ý=oÏôÌ"ZjâdKDvqΡîósÕz¶H&ö1c$ sJˆ>Ìõ„D-qª9»®¾šÝ{‡ÏkÐŽÊ:âaçéäGb±›÷%!ÚBÂiŽA¡ «Ùp©°OjI¤nÍð…”SŸñƒƒMH»iÊ‹Îkºµé°e[¡óG­NnÊÃû›1ÖÆ%¤Ì†Â„>U–@ˆ1›mJ˜­DIë+} -â×¢W.#]¿ïƒ×°§šg™ÊåÂÜ[L,z¡†Ô×ûÞz¨ÉPV>ú.PsþV1†;2\EŽ¦ïTv»Ü7»uð¨æŒ%vñ1\òTg û~“Ù6 ºå 9ñÕŽ2Q”^r4"g -UƒBO£œE+Ó/’dIƒ‡¶W|Þ«1¹%Òïslpˆµ9c±“h$¼ä|¥ªƾΑãî9ʬfúÇ^€Ë÷Í~Wi-Œh û9G:ŒHä Ca>0'èé—]&{¿ÑÈ;OZärJYLWÍ s_(Š©97¿–ö“CUýÇ›\¦ðµ‰¼îÊ ”/mm¸~fSÑ¢Á_|­×d³ºû舘DÃn çF½1Ó(§úýë¢ o}ùR—÷ƒŒÝ&865IVºŽ™hÌçª B™¬Î -endstream -endobj -1986 0 obj +¹`]NèIHΣ{¦ü+¹à×Î#ß逞(BUndê/’‚¶*ƒ ÓošâÀxq¸Ýð{ïo1Ø1«€Hÿ(Áä虸·ˆãÏ–à ¢Ÿ•[äävå ™‡½½i•ïÓ'¾²òj"E`$•Ì·…ÄИ8K¦°iÈbÚîjë{äÃ0eLáÂUluJP, ãiËÙfÆà'ÛªhŸÉ7 › ³®$6â༊:f‰W½<Û6 -%)p\àÏçèeþjÊïÝa—ß\" ûÿ9!œg‰Ç @GˆLmúÂs0½_JEn|‘&$SÁ…Ÿ·úÐ@È\mèªÿý<؇Áu®Zý=½]¡m€jX “æâ}i·¨]ÌP»Û¸ˆ]…à'`€ùL¥Õû}½ÏY>ªÔ;h÷ñŽõdã¯Èg ¨¡û‚ol^k¨‡ i‚âŽEb¹µËЯ'ÕA´vàÅíYqe}¼±ØnŽ—PÞÔA 5Ñ]«X]a¬î-ÎæñÞãš]3ÎÌP›ïä7úÞ <ÌA +³ó£¸NPl…G6³/†E†ø¾’?¯‚]Ÿ`©#dÄ'ñ¹Û  õƒ†~ÊþxL¸sóÿ%ˆ*4d‰^¯|‘Q™ÉìµÍúX½U¬ú=ƒŽ=Êh-lvŸö_ÃxÙÉ÷…±OJk܇âi Êé¯DÙ‰¼®Ì¬íÄ 4ÇÞò—”ý ¢trBÜœB§=Ä,vÊ© ÒTÙ³0$íŒ <*¦qkp¸`´h1=݈òDy.Ubo\dˆÔŠÉNí„ÎÀí"¾·6>ó”ÀU7FäPö•ðÄÌb Á–ùäBbŒF¶åK×úêPå©°¨»Äœ›oŸ?&D†…×Ðœ 2áE¬´ž´¦FŸLXô§³ÊàÒœÜã'(^˜­´H¤?l‘€þ}ÜDçO(ÑÇ¡¸UI.2 µ'ØmåçëbŽ™ßƒŽ#Xíö„¦²üÁîâ‰ÕærÂÏX¨"b_æ£ïV¶ü`)è­…1#ǵøð±uRÿ?ª%ÇÃ{K·btèí2ø›ÈÓËÔh‘h˜Ðªwr‹Aqhîœ2r÷{ ¸÷…ËÚKÊ^û;8ˆ‘á`“JÑ:;œjâ€Gꊚ1ÔZS ßÚm¢AO4óøS"ÝÒëø ˆ—Mq|E9s-­&þPF þ’zä‹÷é¸k|¨ÑNÕ8̵È}–¹·Û€aLCó³Â‘ f§‘ Å»]´Þ©Ch  ‰àL¸á²á7Zg§Zq†ëÊ‘°d“¬®¸ô;¢æãÈñ•wöäøæµ @I&ytŠ±R€­7X^í=iŽ?+¨Cð«bÓê+©îwVªŠÝ\0“±S´ó ÈÙâÆü œzK|V– ÂÐÐnz ¿…qݼ‡É³i;Þw ½vð@%ŸÜC±(º,$…‘ƒ‘ëiŒg2A‹-ô³CRu‹9à 0ü*o‰ ŸÏ@çè¯%«1Ü­´Âÿ­>Ø'éÇø©¦(fôè×¥Z[T[DùÖ™¢åÛ Ä>b´-¥·\˜‚oU‰Œ­k•÷äÃøÊ#Ê’ÉÀŠý¤pÇûªŒ-TR!šõLÎr«³“ÍŒ^¨ÇvLE·`#â3¤Á½°œŸ jÏhØm# oÞa|•…š[F«¾Ño¹Žo±š05âføŠö´iw¥-(8íŠø/žØ/2Sj²¯·Zés8QI@61Z?eA*ÌÝtêÝØlù –¦+#?ÞñښĒ|“™©fóÙ†>™-`›sjòc½ -"íèØ•ŒJê#å†!í³6? ²w­ÁÖ„¶ºn•‘ð?) Ô(Zжu(Y`Xë¾å`À-–Zvô¨k@Q0Óвu“=½–\œ‚Fà +ù÷gî˜J­çQíƒ0}б“˪~ÒÙü+$b%Þ]ÒïMr¨»ÎñŸ@]Ð)nÕYÐÅÇÒǶ6›;µ=wA=š4vºƒçìg5š«Â!OwãQu7Ž~8ú%PM½ª¡Æuì°Þ0Ú’ +™«;.}0ØñvËm¤ +w–Ð,²˜‹„´o‡(8—Úd½MÄ$Ðz5jŽ äó%!¶ßuÕÀ´¬ 3°Npü#ÖºA ¿4û„ó[<•ÛåwUv5—ÖÌ–TNÃÐæ˜Ø!êìÑšøây{µ, VñÆë-¼146r/¦Êó’^•'ð¹•4ûI;ë§Ú€þf¹tÑœâ9™ÀÓ˜ôHÓ*-÷èZ¢?6ο–È] ÉYÖ„Š(8g¹k‰Œ©FgÙÄùP”ëû,Âj±OÆiæÒ2ÄúAfG=×a¯;6R°T„HbGmäºe¶‹îÎÐòHÁ=NÎ`±ÄBЧãn¼¢-nZV—OIÀf@ñ¨•û\ˆvª”Iä‘Ë]ã$ÍÃrÅï 5Äݯ¸=t8ú>Xe/¨‚z¾"›©”ûJ…ã€Sg˜ ƒ?s†sf ÑÇjWKŸ‡Ùz1—žqë€Ú…¹ª mÅ7ÇÞ‰øW2Wø亄I·šXâË[öºhìZ½âÄý3ëPVL°~€àìõµG¨;#pÞ:ÂlÑP¦ «uA·y—»ö‹%'±Z‡~e¹HõÛš.¢­/:R\Âi¸¥¾ÎqO±«°ï§¼)? :°„×p59I½[Ÿ3aLŽÆ°Û]Œ˜"ˆ«À¬¹&WvtèÕKéí‘dTmÕZÏÆuÕ*îm·REàDD+9 ߆Ê?î™Yø¤\÷ËîÛ‘Û®nP7­šÃ¾úX¼o|X=¥š@ PÝÃׂ"~‘eµ62p» tõ+”›AßæÂs–íz ¢—y0yË«vNâËÐÍy6îz_38BÈ6»ÔÀU§šó@êñÖ ¥)å<æøÀ }f·’#@Ñ=•sߟР•Ðßtè9ê;éÓfÌ©u:ÀÆšàñò¤óC.4èVÄìqú{zÊ!pf;ÐjÒŒö,yv€9ïÏÊØ|>È ¸ý®§cܾîo ]øœ46xØáþ/Á©ÚI©`®Zê&+d¬#qI„”ýDžg¾H™ ÷”è‹*K–ƒgðº)Ùá<ïÞÑ7×6}¥)øüŽÆ ±RÀr}þ49óØtÞMãc±^$KK\ù “ù~®Tr¡ +f€[äLì]Ëøu%×Ò7¨ú]àµÑUšP[Ö¹›ñ'Ü®ëÔÆݺ¨øÞ` z†a^œéMJ‘ÊñyLìårsØ)›*ªzüo±Ÿ‰ùË£ä~ðvj1æÌx³ÒRò…B‘æÿ’}æJ挋±Àáo<¯\;Ù ¤ÊÐdä>P›—ò5ÑJ¿F[nüÓÛcøÅÏf!Â6¯|—ç—ýÚïõt{9µ¡¦ßÀfGDѺ ™¸J'Œœ²é´òNŸÐûÛ“av Òî„1Y¬Ú9·% 1Ò •ŽÁ‚ ˆM…!=BŸ«Q$E<æUÝÆ â¼H¡è‘†õE|Øï†pqapÅê}·vßgœ ×z²N†êûxöŸ/Lb©OŽ=*Ë +8:ð&¸?)HªfKù])ºœÜÖ î†E`å–Bdl·á~éÊ¡ŒÈ?}1Öp’„ÈïóV³—Rñ—‹îã>P ÿF~ß¿‡ÄôÌ0ß ÷óæÙÙ )?'Y`Ð)‹sR¼|–Ï΄WJüG!pzÌEäÜͧjc"ý.å_=5½E +*‰„ýkÈÏÆå(Ùæ‰z–E6ŒÌbäû]q@øù{ˆÆö½Cþ[®/X%u±Hä[Íp[Šíå <Ã2ܶ±ÈfÊ‘ N2 ÒŒ#_ËÌF @hšyw¢Xšå6ÕÝA\‚éDåèš……Š*%-Y»šB¢,G?Þ–ìžó~r¬òÓ*€QUXÍ&^à ®70l¹ûú)¾Ö(„•škRË(‰ªLxöµ†X%yÄÇšŸU½x.Ï™C5ù˜ÙÅÄ!ÚÍ•þñàhùšžJ-ái-_á*¯îd +hA¾:kLÑŸGt‹¥#Ïú‡Ö°Ôš¼ûçøרåð¹á¦bP•±ÿ¦0z/„™¹#¶E\µG·qððãÈ;]-µ"j<ænø)XMŽîI¨w \£×;¶¶[êZ}Š]ÅGáúÀíÜqú~G²AÓ9‰ á#BîÓóh¿ó2§Qûý 2I‹×ÿ/ÑJ(?Mq”DÌ#¢?õý,gðÏ„lå‡ßý?QçÝÙÆB’®^.`†ñ~þAÝÆžC)Ý—¡ÍNÒãßÑš:BÔr½]ƒév^/"ο6!ZÿÁN–DôÍ{~’‹BÜ:ž–ý{6aZ£ÈÏ«œþ?}/ÿªƒ–žT>Æ÷P_ µùÅ‘Ë¥‹Ø7i±«CU´ÙuueÓ!Ú \ì‘“3ëàë²*æô¸S\nE´u§ßvÍL„Ú uiÛ +]’X‡vãu( ü>šÊz˜—Ï.–@º¸éÏߣI–ÊE«0‚³÷ù#ç:õ¬(4ÞaÞW/47-ø"ZÉ4ò:מ„[ÆixHÖc„žBœÍ± …—€ãH[܈¾”C;R £Â¨XÙw›ž\ÌFŽàsšÇ ௹Ê0¢ˆ¹¼MÓ*KÔÚkH‡pahwÕ*¸gÝÞç¼æDQ}õ ë5ôNk)aKËA÷cü Åðb“·wî¸PÇH6žˆù‡˜:ðþWT(šZ[]†îÏÊsÏþ[8¬‡±ý† ¥&§i{Áwÿø(N”eåBv÷7;F’¦jrÑŸàÂ+õXf•úXTœ[\GT0EjêŠNã%7­Ù+z¬c1<²^OÙF «tÿ%TŽ{}A@ÒÍ‹óE Uk¿›ô]Ì9é‡W ´jðî#2ãY߆¼§Æâ†Ú‚ZÞùí "®#"x4Û¢lÏDïRõœÏ0i<’W2Æ¡¯üš öS@“/ͽÜÏqÞè +²8`×\åØi=ñuhº2&fö¿YÆø”ËV¤¶vAx.2˜ÀÎÖ_Ý+ÒߥE§²<€M¹(\-¯F­À±°S2þÓþÍmÿÅ>ésçËì¬ï¬Ä|§%eÂÇó:sL©ÖWáË)Úõ·ÃNÛˆßLq;´UàVßZPOIõ;ã_¶ߥ¤Ç|–}»ð~å&ÕF½é’Uud\ÇìsíšËÙ©ñpL¹+ +„æÓ%°èw[vÌ;e.KgEVÐ,À0¾HÊú­§}íþ²³€®í…±»’Ëõy ~S+‡µ¼úZ(Ý‚ e/Øww^ç·+ùk\¨ÆP<7p;2 foß?”é#ÆÕžW1í† +c ­mxþK‘\©Y^=F3‹0ÍÇB»—‚t÷•½OŸê4x&%–xàX¢F|ƒúΩ¼¢üvsßä_ê5»õ¯fþw.R/z€ùIÊ”U' V$ËÎ÷ÉŠLîp©ERŒ:†^E>ÐaéCó+8KvWJP5Ì]§SG€8‡ÆöÎ%ÉܽVèƒäžäù_*g ¯'SÝž«º;ük] +æ¶ûÂ}ÈÒE}Ú³Ç梷L^„†Â¿æ&´Rûû+§V9[„ö- ‚òÊyÔ£{œðn!5vÌ£èTwa'ý|ôÃ9tª‹É6õ,âÛ¾?f Zâ32¨°ý„k(‚äX=Mâ‘©Q›…Æ]’Ù⻳‹¦2×R!c²‡Ûõ†'p«š•¬CîI×ÁÌ•Ëç?/V3:Ò4H}“Qææ×èJG©…âá`[ãÒò-á}‹"%Ý…7ÅBµW9ŽßÐIôM§É6YýÎë‚Á[Vç7U¸×²’JðM‡;” Õ7t¸ðqÐ^Û¬OLu_H(ŸïNWê~ŸæÉÆò>( –þX‰¹Þ©Z²Q(ôU ”èÄ·ý¸]´ÜP¼õRb†'sµú?ðQµ¹ -NöÊÆ×Qd<±Á-Á”â˲ŠðûšZ–7¾„ZŽ&£|¥Âè¥ì2\&ö0Ž¨Y[å-éÖ¯g~läÝ´s§3îg +Ô„L÷“á1êf° D®r o–èÐ[—óô\û,Òiv™2¡I”É_RÁe¤‘ 'ŒMî­ˆAÜ­ùX¹Äuì8ôE’ Ì¡×KM9ÁÝm°ÄsA–µk7ä“-Ø6¥’=>m¬Ú‘}¹nbÂüZº¼u-¡cs ¾‚`—™È‹yÉRD(›pgUšbô,vb",¾SyUišµàô/o•Þsü‘[”´%VÝñé§mþ ÇÁ `fd“þ*¥«¶Y-ÿ]Ó/OÐF +P$è.Š2_'9Îñµ»_€TcŸoìovZƒE.ÇÑIEu9·oõèüÃÝ$e–ßúnY®é‰ >™~hU~5z¡PßäþÒ–ìÒ‰¥›onI@ߣÑ%#àKâN +-¼SÀ!CLÂýÃZO܈ãeô Wg´Â)Ef6;‚ …{3O&Õ]Òéz¶kà÷xâ_¼~]Z*eÀ}O¼»¹Kà]ÿ+?½ }×5áá†ð~Ëï³´G÷­E fF¤î\©šÔÕùuÍŠÛäj¶¼Å˜š9Ù,’ÀùÔÄYoÁ 1ÌÕ+­¦]¿ ë© )“{ƒÏk߆¹2Ðl?Ù‡¤ š½­‡‰(g±›†Xë +žd½û _Ä}#ûZéM„µºÊýS³µbÙ6n +»GŽ|'Hèr¢³ñÄyTÊl‰ˆ0r¾¥R°üN 7ô¤E?tæB!–6àéøy·”@N`ÌŽ£;R¬%–)|u‰Þs-³ ÷þ@aÆ쇗 ].ì|7 PäÔàØ#J˜‡7ɦXU%ÐæFO?©éº’Š19ú ÍÄž*ÐÝQ{oõéYôhàˆð©¨œÈWÕºaòú˘T÷ìŸÌÏÅ#øMTpý—}A» Ë,‘=v‡ï„GÁ70g™Ä V¬õÞlÅ4R;›×ãCÓo£‰a‚/¡åƒ !¹O´Æ‘kwõ|tÖ•Åð<©YÌÔŽþÝ ‚”‰Á€>I?¨lùØÇâ?ÛO|)9;µ,²8KƒƒHö@.“D)µZè«& +ýŒ]¢ÔÌzÂ’‡uPÞy·ã©……ôÔ¬U¿µ7Ùí{‰SxÌ\ÑGÂø8Ç”Aé@Aøžçƒýç“j£üƒ$ÕýnA¬X¦ºi G > n„~¡”OïÆ2@G0”Å懪+äB\æK¹P€!x"³ Rö™V7Ó` xŒÂûðotݼÙ_÷atœIÌ•æð/ërz.†U’˜+ÆŽïX¯¶j÷ÌÌ€ù«àQ‹¶òí•0ºæ u‡õzäb›ºäŠª:ü:€?7%ߦÜrûµÕsC@uT¼×Á? 1» îüÂ2_‰rÎHàD•Ö*¹×–“ל³Óƒg:KKCÇL §ã&”z‡™æ4"‘F©sz³Ëåo]¢Äq6æ›?C3Q6ûóè+”Ød€F|§À¸gM"3C,+TÕÊåÛÆ"»ëÛ£”¹Ú(ïo—`ÍD×»rYÿ=tP3é†<$¤Rêǘ,?i`µ3üÛ_ü í€±‡ÚIÇÐ~“UFèó)líüiÎé: þÌ÷šôQô\îpõ½´•ïùè^â"Œs6à/(Á#•¿IК2Âòí€â³ +àEé(Otn¼úVsMÜBQ6«Æ‚€÷fËÔ8q$eh1†/ðˆ~•ðĆ|ÝoÐ7ྔîíàwÃh¦è„nS]¸S9V?÷J¹m”^H@¸ÝT`åŒbÔ\©^cÊ<ÓêN¦Œƒ½Á¥I7³=†ù¿¸",Y(G!â¯Vóƒ-ÞÍkölàSK7\Þ<¦ÝqELôª¥±¤–¯iàÂ¥k#ÿ=º$Ð@,ž+öô` ©áÄHß‘M'f­y;Ð;ú0¢á²c« +Ãm‘»;qà@¿å#!¿^3‡öƒ&æÅšžvXQXnŸoC{ç‰&)É…8{ Ü;Ʀ¦ +±Å(N=çÞ‰ÂiíÞ‘Dh]1îw!Ÿ…}ŠA‚˜¿½Fû¥/б§#Ä2´´%ñÿ4ÿÐÅ’'j`Ñ·)$‰Öùqàª: Ë™T¿ѯ2ŒÐ]ž<¸c‘7?gädØ|^ñ>·,®ºtüßN…⦌F¸¦3è./Ú@R&æX ÂáTÇÊÐ8ü›Bù>JÛ°UóAÝÇZÆ«QUôƒ~å† ½£ý&Ø ¯qÏmgråç}j‰¿zÅþìYgrô>ª?±î?CsüvàÐôî¤ è +¾‹ÂÕI"8ê/½Ë©n§6¾VF‹w^]*[äFî À|¾>²IÙU£:&Õ¹ÅwÔV˜£ÈnGËáÓˆÆï„«ˆ4‰¡LŽ®Dá*7i“)T¥ìÉ#-%ì9ZæÚwR@éWå¿B/ÁW)å7Š?m”wû=´"óTë` aã,cNÌý÷yOEoÌ\PG÷Áj¡Í¼Ò­2ÆÞ«A ÃÒŒÀ§7Ç =.ò3?0Jº+;ÒøߌÁ~|úU_¢”¶XBå"(~¦Ø) yO{ëEÆÙÐ¶Ì ™Ÿf5—îž0M3žooD 3œö£ ŸIö¯ ÐSL +C¯ë]ó$""oÁ™Òa£Ò–Š!#\û·M¬p§ÎÚ-í +0êX¬¯3´ j1­šW«©©¹%&AŸsùmâ˜]ºCÎ,\glh©/Ò&GðhÚZ + ì(Âé:è~1ÅÎÔ!bZJ¼xØwLSïݘï'Ü ¬át7ÔeÅHÊÿiÕâÝݼ) wAM9Ö¨2O‚¹áÔ×õ I)õ},=’cÓ¡LA½ºYJ…ª<ýHl…¯ë–íLŠ§"ÚW˜ÙX— ·ž" ^½•?Îœí÷×ÙVððcèyÑ"ôøÓr‘…Ïw +c.áËŠÄ{$Æ4_>–Ø߈B ÍæÚÝáfðF3K€pý?‹å…4a´“1WŽÏkðmÂ%± bgÐOä©>wí+u5n‚^ðfVª;^]ÉÇ÷µ§ƒ5ϯ¹á«8-.Våïû=·‚e_~SŸáø¢ƒ,vÅî©€]AÆÙ²0CG\ 5%ëW)دP[&ùž¬r¨rÃâD¾@út²°ƒTì¾²‘RNI¤d.µY‹ó±ú7C«tHžjÃpdµ?—u’PežPµW[.¸~þ ðƒ0†ß¿>6ª[Á2nˆrn-og#R¡^ß/e>´õrRùUŽcom&ù'Ëè*0úÌ‘ÛÊÏ¿ˆ_Ë¡ãÈ\qS“Äq çé³ý“H"#S>4“H#¦AQc›½\Fx(ìº*Å2MÜ’ï ÄÙ(q—¬…Dí¬êñ9îšüéo ¶×xøüø2[*+/² 鈣†:”„5›ÿ甧[°}TMØ ¹üñ8nwEl±ËP¡bhè~.´ÀWžA®úW«eÄLÀ™ˆá¯’lJ{ 7¯/¹K >ÉQ:ÖÌèŸéí‰J§7›ÏfP.šŽÌySÂC±2€ªÃ /DØûÉÖædj«º©ë§^n⤋2¡h»ÃÔ˜À‘[·Ï?W,úÑT_S|Œ)aFP–K¿ò öˆÏ4*Ñv Œ*Í.üèX+L˜·60¹äØ0‘¯#ߟß,Zð6ÜN¶÷*=僔Oë÷›„‰FìêEfò8G?„V:Ò òâ«ÎTÒƺ¼‰!U$Œ1w<ðxÖfXØ7î +Z_<©:lþöÌyw8†öí²®€Kaßëµuåà•c2Ñ9:ð†#ö®Ý ÙôS¼„äšA›™»³wÅø—«IRVxìèªMƒ„…„ŒCÜ•`äkcÚN“?-8Ý€óÒ&‰bÐv=8àùaRÿꜹÑ÷‰cmã:Òÿ½È‚€O G¡t·‚ÛîúJâ©ñ•aÚbÚüv%ž%† +7-«¢¼œ>µgòé%gŽádY,xĽÜùxFws—7åaË«¾cÚ®Ât×è­£qúåHg¡šî8w­3Ó”ÅtÚ¼™<(ô·F©t%cQeÑ[ác$ïET£-§·éʲ®÷§;ùñwë‘«æJÕV¤áÒÐ.'ÚKÙUµ˜Óèxâ“ÿvuí"÷@òÎýMxZALê`L"[´Ø ±ÛN"~èý3b²w„–X×sdËÓJAºß/šQ½½"Lò xZoFç«d×(7IÀÕ)wÛñ‚®’_/¥ÖÇ'Y4óþ]9Ë/Qfÿa}bß-°ÔußœÚ +’obÖ#k…m5P>cìNˆE;äraŒÚ laÓ´¶°þS¯r)xÁekæʯ} ñßòRè ã\¸héã™ÓõPbù_VÐì‹;ÑðÙÔÁÏÅâÿOFÍc¥&%á©p}OY3ö%" Ž#+sx­b—bïP‚ʵ€cf_wTX;OŸ¼ŽP1e„ßséÖçÀµ²él[qYñ+ÍpÜÆÇtpsË ²0Z’dR‘ +·ó¹}ÙØå +s®»jXª™'MùW! +@E­úÇ! YRg:qU¨3`0O Ó þHJþü|z7®#íÿ8ê ŠPú2ª×¸{2Ÿ‘Ð+`æ‚hÌZ•÷m¹I3®¬YG¼Lغ!}¨Äm ÚÏG3n›ùKј¡:pFµ¥í”º©Ï÷}p^^°‹ÿáÕà,î·"åŽ + ?ëÃYžq•õý«ef¤òsŸÚ™æÆ+æõO=3y(VóïPSÄ.Ïw¥ÉòÄÿß;ø•ÿÍ/¼9´ŠÛïõX¯fÁ[ÑøSü¢6Sás9@;É4b.k ýZuÜI9ÃAŠb _sv­ +].#i}Å\`f0bœdfwJ T/ h÷KÝ0rØxü’°ÙOTγzVýI®¥u’Y|)ûÕlnÛ‡êÛ­ðÕ[ XÐ;= j,i ÎNa°á! %¿5€-ÍãNËhÐ{…ÀÝ1‚¬ÝnqƒÅnu‚ì‰;"FÍgœL{rÓ[Ó?¤):˜|‹\Uô ’îÊÔHÓ6¢†(­¯GxÆ12d¶*zœ×Dùrëúý>¬‹q&‘ë +§˜]§OHP £B_VÂí*LµŽñ裎A 'vvuͽr‚¶ñ4RËR¤Ý@à ¢o’,çÚñ×b”þ>¥yÒkm¾ˆHCÕZèµ Fc•¹ŠgË]—ëîÂÕüBþH”Š/—’QnKfA£"LFŠ»“<“ØÜ·Ÿž¹ž€nä1ˆ € %Fúû¸y\±õr{˜¹anX<É­nÛU³Ð:o¬"Tè ½WÁ¹nO{X4½="\9¦KÞøïR¸û»£^KDZ‘>H¶=Œ³EÚGÝyˆ +N¬Èý„Þq¤ˆ9ÞTxE;ò_æäòÂOšš6¨¶fDÑ!&™Ÿ'™³ÑÂ`óé&®Šë÷1ðe"8ØA…X’2ʺ~> —änpç§1† Z€5©¿£ø²ù€Ìç°RŠ.]íËöû„査@càÚé’@i\fÎ(鈿R–ŸðyŒçRòËë(gÁìÇGÃnëÁè’Bœ¬Ù[;Šç<ç÷Ê]\ í +o9wü½(Nšï¬¯Î§ÙéÚ«*uˆâÉé8ªy-*5òËÉ£czVK<÷ºp^ø17Û_Û3[Øn`¨s=×Y’Ö['@‘W‹È*êöÑT_S|ŒªÍÜW)i†Høë¥%‡"WRE׌´/b),Ò©G[–s¥ZÙ,®e¿]ìÎ÷j–ÅvË:aÍÖdZœ}´/Ý_ùî_©‹/Ö˜ 6SX`R;À9+ ¯ø7Ç嶌˜7„ÌíjK aô·p(A\N‚Òm6n1ú°Nëú¸›M¬p¯´Yòæoý]FPiÕ_–Ò 7;ÞF·8‹6@ G+Ü´¹&uüÁC®lœÍRü¯¦é(µI¸,ªŠ"|«eèöÚ7Ô8š—}ÝEíYBYvvb±IÂ#c½nl¬ü¢Ê +7›_½e¶LZ†o¤"/ߊXO„ò'¶ÊÙâ¶Ûø×|Å2ÏõñÐõÎ]ÒÊH, † ºÞƲ¯*¶Ë9»ˆWnÿ\´—mQêšFÀ´ó¢Ö²Å>0W¬9žü©c|Bà99óS³åÏRÝŽ ¥a#‚וnW^{^ä}¤½d-úعð¹í¬iˆc&i%÷{5¸Áß]'Òírqñlä‚fa[ÄÒ/ºurY¥Ó*­:jéF2 “FRùo7½òÿ¶:‹ŸAX?u¢Ñ3šF<¤?XÜAeMIuqýè¤âø]¡¢øJšoÏ ”X}§©;v =jl…R¿¿>;ìÀ<§óAdfænµ°Q·Öºõ¢Zõaî~ú ÞÐ)Ò'02áȥ͚Åå,Ô;‘ œÀbÞI&f#¯š´»6~—QŒÎdÒ”d÷¦jÀxr˜¢Î°ÿe©¯JÅÆŽ··¤5f1Ê‚U³?"§'¦£-^4yoϲ"Å°ÀôàÙc¾ÜpZÕâóÃ8äÊFéˆZðJáñOS4H¾ëjOÀøSö‚Øv}Ê]HÏõVû¥Êþo§áîàõƒø–Ñ{÷…oë¬Äj‘w×SýÆ5aÎy¦êß$ ucÓ柼̪úáA’=2^°3!#½I$ž¸>R#Bu5ì¢ûËáIöH5#ˆß5ÔÈc)rœ5DG'ð*¦ZfçÓõ·i[/5R7Í÷|ÊSÿ WY½7ëÀÖ¢Ô¾î!&ó©bi¹ŠGx\ï»MÆègrRBÛG|çg™IAé¥×ÚIÈ͆öj¦ Ð%d¸ £òÕ’‹l–Û3)ðoßÊ6¿ê •~O´»<ýHh½¢Fg9 »[±fȶµ#Õ³ªüÀú$\g1">ŸhÑŠ')üZõõ”³ÆöÅ4'ù£Ñ¿ïÔø<ëÀòÙ.vçÿÑ»ÕÖúK¹¡;~šˆ3Øx1±ö*Ul­oCÌl-×GäƤJ5^¾J|µ 62¡¥¬tYñtôúk4%üb´f%)+ø§.s+Ý~U*[á¸ãŸ'b“ëûï þ3§Åî½´LkZݤÉ;mÂy9…q Jª¹§d OÊ kaùx3P+%[sÖÐ8u¹ü4jP7,X!‚ƒg2âЪž–ÑÔ””dÍ›­J©h»à¶CŠ.?äˆMˆo;ëƒÛþ8-stßœ!—o G€¦X|öé¶#Sé{›H¿oOõE˜y‡@ý€Ÿ,pãQ˜êų½y6OEÝž×cmb¥«‰…6 Ã9x T'±¾Ù4™“ŸülÅ;Þ1šbj2¶äIä1ó*ý=lž….OŸÂϺÏÊñÃ![67@oýÜ5»¯5z2ÍN_ì q’%½J°åE¤âM*¹²¥Ù0Í1œ°l"ñ•£¯ðËïYó+GÔƒnÌHÌ®·Û»n×m¼´ Y¨ŒšádCÅç Á’CÖö·¼8U£‰>JoZ3¬4 m®^‹»š[ÖTÿdtv3pîÌž`Y†ëLKË=ð&JK%žK¬5\ÙÑÝ o·DS°ÍÕ®,ü¯:ñE¹¿þTiNëwþÀ!H®™ŠËòfþ·Ú'ÏÀß«Gü§5 ÿ£{lÀ­5üýÛ÷´¶Â¾iß×,×3ðí/¢ÝnÙ)ò3o”øãb†¬†/Å’s “9xý1Q­8þ%§\†ÍjLÚQªu¥›¯†ã9mz…ÓÓu5ëýÈõÙôÅ&[Ò¸|¦žóØ@¶n*FœTÁÜö61âÃç¶áôuõ…²æ™l½ÈKÙê%$p›9fk_=ž·¡RÇxJÆonÇRÔ'§ ^>»Æ­£ç}êܲeý"ˆàØèÂEü•ôH?í€BKÍxly°¡öJö¡EÝâá1—SÀÂÛ«;ÙqÇ"hK”=‰Ö9fü˜ÙÊ2¹y3„*½Ï±=^1}Ó¯DähþÙ|¤R€jí!ªäŸâÒp—°‘Ü •=JÚßÞ)¿(Ò«‡@†¨JR¦ýfò^ä¶þ3€Ìƒãò³Ik!?4†hÝ÷Zý`A#& ecÚj…×ܲA¿f%ŸVß¡uŒ6ûOê4µ‡´#>i0Š=t¶Ýûºy닇œ×/bñ4¶æVͣѺ·b[ä±Ñt¬°’F ¨¹ˆ`ñòcL7…³Kž>l@ŠÛ5Jd‚ŠFhkÜìÓVi¾–(”ËÚ2ÿ+p2«‚(M ‹Ë¯¼jpZœwµ!†õL§ÒàwwöÑíR€…O@¶¾MC©Üt¤óKgÉ—_œŽ2Q”^rù£3û`ëZDT`%%¬ÔçÆdÂjòV–U7ÐJ”‚GÃêwÀ1N5[šN +P¥—NÔ³ǼŸß–§Š°K#>ÚŠÉcn³ÿ°ÛÙ)(<éd!Þíz*`ûßÏÓ. èÏì7ÌÎQR6›-VVŠKÊA8bÅé–¸…dy Ê\»ì™í(Ò´§¶Í—¿‘’†À;n¨v*BâíÅ—´s73] ¿^PÁ(ÿó!ΣåÕÕ5.̶‡Z¾+•J¬6ì•B"´ñëF½k­ë[ C‘Š‡~J¼GXyo0Nâ¤TØC¾$‡\RrƒbP)$'ŒøÖPÓ¶àS$¡#ÃrøMÄ{T–ÀÈŒú)jj&FȤ'+žÝ6nkô˜Abͦï\ÑrÍÇ A ‰}9±ÂŠ„ê-ßDqx×QG’,Áš‰bå˜óî®Wϼê’yIg¥K†Aqhîœ2rtØ +×'•šbÊÏ_¥73w¢¹*] +fM^¨pÞsêàäÁbèfÜÔ‹¶{œáÐ×À ²õ(ûG„FLè¡Iï‘!ˆ)Óû¨3ÝQ¦· '£•ŠfÔr ÛíÀà…{}›´ÃÍ•}L±ÛÏutŠ~ÎOÜÊ?ùÄ8N¿)ˆZiØÕGàŒKZ %_'\Ñ÷a÷‚ÜÒ<†FÆÇ +[•„ˆR +FµÕÖ%£q¥øÜ•Cå3b©éÜÎW¦‡‘ÚÿY™¨ ¢jF”@ÇZK­gï»`àLÝü.›äp™VX?;žï äÔ›£åšÑÌwc‘ÊÏ—µcgïΩܖ^V„tÆ ïÏÐÿ¦±Ì£‘¸÷ ~£šÁy ¥»¬a^d5œÃr¾÷…]Gw`/s-ÌÚ·²b¸¨}ýšãe3ö+NW0©:‹ÄùŽ¥ )“A»º8…,t±Ò|%hL;—ó€4àë ÿð§÷Å.e\éNhôéî]"óXˆRñ€Šêi ®J¥³9Á‡õ&0µ# ƒO&mp%?q×DÚO—jâ8®èâu¦£ÊO|pØ>ÓVž^Ý#Zº!Œ‡çôì¥>sÀÃG#N…}†‡ôÐý(ïb¶lÌb…e4¸$¬Ê<ÕŒzæ>æ=4¦e?WÃÆ *>¯„]ÏÖv´I_•öD°8 £ü’ªš¦ê©Óýà{®¶`²ÒàÉpÑû´Íë_Ù}¡ûúó©$K¨´\¤B°/ÚQUùÃÙÃX´+ÙËÁ@õÎU5ù-¢—‚¡ênš­s¡ƒ›i¯0äþžC¢Š„\º‘)š„‹Á ª¯…`¢MÒIåùù÷ÒixDàýnw®áÖÐÊTC†Ò Ø<.ß·Ù¥íÔ·z^V±É-Æ<ûîX4òÂô‡Ê‹Àiæhê#óÏãõ1îCa/׬õÉC°céäšú)q¼ýÚ ¼Ök ï &†â5b5-c° æNW@$#¿ZTÁÝzñKÒúbX”ŸeŒ½DG'ð*§ʶµž”2bòæï²#²\ð¢'õy¥|´ ë°±œÓÑõ¸h]tÇEMšÝ-FwÍ AÞ¼±üO;(…ÎCq5pv=$êÇÿƒ“þßþH2§:òð‚–'ô¶]…EVWF¸îÿûRÏôƺUÛ¶(ŠÝ‘à|æ,àÉf43l=C +½ÉÄ¥Œ+ì=ï½pÝM#»x¬-ÉÁü·Ög‡Ýí—ý|/Ù|åZÆ¥•«Œ(_" ø09ÑkpùÒ:²“.ïò'æú§ÛÖ–z“mè}¹ýêEñµÌ¦%Ò§éà’¿ðµÈr¸…@êŽÓ)íØÃk×it³s¡žé¸ÐÑ7i4²€Ü~t¡Gn}gÑ­à¡›ër¼ÍqA©½ŽÍÏ…‡í°‡JEÒ’os/W­gg®´™÷Ö !à]&&,Ù½¹ÒZ匾“¼cð´ÓCù+]dÚ?Œ‰j’ä_×å…¤Š,Œ·÷?>Å-4c´hê‰4wþÔ(~•ÜÕ¿¡ƒ¥™ŠÞVÜÖž!Ø ÁñÈ »nê ©uIÓL Š¹ÛN£ŽÑ(JaŸ½’½yqŠB «“.…“L“‡b…ßÐ ”éKÖc“mI‰SÛ>ôؾ >h©Üt©Ò”W†“®ÇZÀªÝ °^*¼õ2ó‰[á»qgú$;û7®BEr¢Nü$WB´?KˆTS}m”R3o…A¶3:–­ ËÑt6¡ŒS¾Ó’äA™gàÕi[Ž/Së&QxäYY½}<ÊiļâÕéì”_ j!zÁa72¾I»Õz¥Ü(^£lØ7§þºéórà ¾˜ÇÏ$ö‡Sg›‹ñ³ó±ï$—MÖ‚JŽ3Z·ærq[ÙsWŽí5ÐiŒË•q",Í)W61EÈÔ—E³‡¶.gz¹LCØM^ï1t—² äw¸U5©5Öõ[€›ƒX+0ù¡L½Ç¤ÑÒx ÕÚ»{Çx?•Ý-ÜÆ4ÓÖ4'Ž½I³ËÜîü¬ r>ã ANì ‚KÓó4ƒ!EL·Y³Ø)ÑùŽo¯cû«8–ê©ŽB챜{(~®b̤ÓöÚ´ní@„]šûT:ã¶Ïts©ª¨—Y¶ú£r¼K…••È<½Û% Ñ¤‰ë ±iØž^|øây,þ[ƒáÒÝ3åø\R6eè›à…q$Oýß²‹ª¿§žI-ÊJS €f?3ø}DsYKÄmÊ…‚ *Kd&*gS /sîÛØéɇU0㊮FÕÝL£›PÞ_oƒiŒË•ð§ÇÞ{Žð°Â2 lÉk°nfeÉŽ0+> îf¡Š1)1®Qâq:{rÖ#~R—"[g°Ô© +üï§öㆰû¼ã`9¥à NLÊ·5šØhñH8Ê’àº5¯F;˜ºJZ²êuÀ›2×ðéó± „u§É¾˜O§|6¥îêÄ|þWZ?Ç€ßí¹‰Z³! Lc‡·›«ð½üÖ&Žf&¸žå™f)ž*Uñüõ~'Ú"íÆGSö‡BüIºLÕÛ6†ÏH®Ïˆú~?ÑÌ?E +§@Ø×8Sö¹õˆ X}Ÿ^.5n=þû¢-´&GËúí¹½5G’¬Ø÷LñzXp`Ô;N]çûÖí‡æ$ÐõqCQñ¬õˡ׈à‚ðTÁ°O+½Œ@^xô÷óª7ù8É[S‡ÝÀן ”Añl™WÝÝ%OÞ»šLÀ™®Dævý®{±”ÿ„ ÙdƒkSêï8s<5'ûüýõî¾ü#”ë7¿™õ¯ò8?Þ6³9†k¬Fýu@n]‰žù/8³Ø¿@ 0‘E¶û¥ð2Šl}‚û÷½ä\†BÖi93*ö¹ÓçÉÕÝøY—4>zÞò‡é‹vôûʘÒeWŸÃUÂÄ8üIà&›õ`~¡z`ž6:9㬟 Ð7;uòsN‘ïK(Ÿ,Z‹ ä'¸-Ü‚-@ºGÜø!ÿjiZ*j(ðñzhÕ»Þ”[nØ­Vù$X½ÀŽßx&šý¿2ç!ã4*o’‡+ ÉÄ>/ýÇ ›Â¯Ôà\²¹€ˆ¤TžkVç`MùFª6°Ùm§¦¾2¥v9ÍGÎÖ؈¡›ý’^ QìÁŠìº¢Åž*æ‘2Þã`ŸLÅ9*ŸnܸÙç ¸/)B”–¯K +J!œ‰×LÇJ!{¨hdqîžÃˆÞ±Q6›ï¶Þ¢ òW±Wo¸mR"Ø÷ ¥‹n£ê#(íý—¤•ºÅÍÉO}Ù-V¹àZõýçãĉaŠj­„ü<®—çf<ÙÍ…8éû¬X<1œy¸í¾GÃáz -Ñk\’lˆbƒ?suÈÝÚÕ.g§Œ¯WilyÝL&Ý=Ùßgž€åæ y +ž¾;ûéh1¢ª¢ÒrÝ°;¡¬1½þIöNÛå&I'9ƒ_øw€€zöRW²;¶ØQ«°•¥l§SÞŸS†4îñ %^ç¢hÞ‰Ð7c-ZwVë_Æ,éž«±mb!Î’Ø®IY¯H‡3ƒß}^ß‚ŒÐL‘^°=%‘* + 8{^¼ßy¤ÿ§! ŸøÏ(š²ˆÏMšÂõÝHéSŽ+ÖÞÞ?%ëá02•¯pfg%@s_AÍ«XPˆ)ˆ,°~¢C*0þ茸Ûô™¬ÌÔÅ@nˆZqtTMùÂZlòÄŠ Ql„l¹LÃÃF-ÃÍ‚««Ú[¿ßË}>œ°Ÿ\‡ÕsËÝ…Ê›¯‘À®²‘ Á—žamξC·æ¹Ût&­<+c„5Ü. +ìveMN:kí«v’ÕGùÍ&~ ¡ *TÓ¹OÊÈÑ?÷梆ÀhÈfF›;Cwî‡&=[·•ÿ]Í`_hεC°w'2¸åWw-p[Šc’ýO)æîVðxN&×KI³¯‹óºÔ¾Z2JóÞÝmU<‹TŸ(¢®Y²^›>0}õgaOzž´ô×U„LŠhØD•üN‹%ÑîÀ›…ƒœµöÁè!é¡u×V¶­«<Ïìc2€_ØÉzvÇ‹»×Y£¼Ç&–Þ@ëi=&ú¸\áDׄQëTC&Õ¶¶¿@û€zÊøa³äuŠèI¨£^B70»Q¼KUʽh¦ ð®k+¢³nÐ[s½=)¦[v¤²ÓKnÞÐoSlra”ÞÝ.ïlv“:"R+òÇO¿X–ÉmMÇbM¸õ²|©Ó¨L+dªóØt7*Æîv´Þ1^ ?«ÿI–ÉV­Å—í/o¼áhWúèȪt#k5ûà3VšÜ/å–-Ãøâ’ŸC¤½ý/04ˆU´‘ïL åNžñpþ0,%G¹“ËÐTŸ +^½Ø¸;i†šݨ ÁAmé|¯òÿ±‘•v©±Ø²‚Rã›øÉda` û"8‚‡¦ßá–”K$i-{ֶ󢀘#w\q$~ùøÔð ÅX8 ÿ%zÂ\N;-50xÿ÷â…¡ÙB)èMÖ½ £²êºFìe @Zœzx©HVa‹7Ë8jÌ^ÒÆKsñO(\ÈY­j:?ÿYîËq›c#ÚÊ;yƒ„ò{÷¡4ÍÜ‹ÑŠP<ˆðŽJ`ºT-XþŒánÉ÷Tý"𦜌vÇI$¼õSJñ—ø%˜ƒ·Àžs™7ÚMM_P¯}”BÉÒ8ú›Çš”„Sñ‡nجï‘h×ùÅ‹IqD…»5gêÁ5‡*­¯¯fCBvwè>lúøÒœdS»bª:TXº2ùºîýŒÂü[/-=JƒÆq ¾uúGáÒ¤wàW¾™™ºÎÂM®o2IÝ\þ‚žµúªèÿ‘šÏÓøzta~½,“E?©P Á½}-ì°¬…G)êÄ€V¹ê†æ1ZÊï Ý®|oÚ-jßEp,@áÂõ©,Ó YA;½y’<ÛGËÊ‹M؆zËH¹Ø”ö‡p«hç×’a¡/!¬ %Þ+P‚5é5—D8¾Øb°$)B0hru@ZÄ]Ë(áÏ0û”‚²5-é™ÑIëuœnò„”¶úP¸§ƒeØéUýê‹ö€Ér;8¦M”¶Ë×oÞéÀwrbè¿Ôñ–‘øây{µ, V©ù1VÜRÌC–æ¹²·5Sÿߥ—@_XqYüÛ± dY±½12G/µ«5e;.©GÅœ¶ÔŒ0éUd½‘à}IȸӿD`ˉ‡JÌ{Ãg/ØEØÜO¬¾u:lÅ'»÷ÈÚñôõÍÇÄ‚}iñÎeÌ|¬±®eñ¾SÖ”oîþ»< py12´ˆi¤€©*N»„´߬A?*½ÛÓÕ¯ÖZD’¯Q]›ç0§‰qmOÀ½!Éá‹|ÂxäÉÅ;’ƒTBtâR‰¯’ÉöíÒ“©nOgfãÁU†sɸ|¹ÙÁ¶Ó\Év:å!žÌ ºÐutÙà™SmÓ'èW®”К›O~,£½Œ mß ˆ” í·jÂóë5Ѷ·ßyc@˜¤Â¢ñ#V—4/62¾<·y™>‘A§r= CŸx«‡Î>]¾[{ÉIrç• +$@ÒW +¶?þ…Ò0㨤 +B¡èþõ+ïví06Å?ŸmOu['kUôS1ð+”ÞõÌâ4/஋±9u7ß$áuc‚Â'\—ËÇóe¼¾€[  †57É–ÔâQÚðw +ã¸YS8HG½1GÍ09 ‘ Ûzö¦ËÈ£÷Gr/nÇã·J]žÓÀÆ®K/+©ÁKŒåƒ¤ËÁ'îÙl^-ðÝ¢ˆÎJ¯þ¼'•@Ek–ˆí®n€Q‘,MšÝyMtc’$)ïôS ¸%실Y ÓUÀF 2 +J·µXnŒ®xÁë”ƬÌáBqåÏv†/Z§>bÛÕCLòÍN`M# ÖZyìz^µCÿˆ¨Cp÷(ØÆ}‹!n9ÀèUÁGÔt™áWµ|!å|?ÜòñmUá.“·é§¶A0)ã_sêô>³A·F‚Üð´s7•@ Ek†wˆ#îÐg÷GNÈ‹_sr9·2wbâ´k Ö +—Òä—a °ö*‘Òë¢gÈ_Ô'IÐH«n”íNŒcbñùÿƒF‡ˆ†´Ô‹–_­“{ „ CâxÜÞù¢ëm®a^¦lT/ÑmUºËß×: ™&S„S”®¤JS¶zÌÖçƒÆV 7¦ùœaqbýƒz-ï¯2Ô|£Jê¥æË­{øŸ–ˆ›‘ÒŽ!åÄ—;f@~æ(+ílø¥ûøt%v?'|½HpH¤3r)+þ‡Ñ»U!&ú•*‡¿òŸjYwÔGnPû¹ pPyyÌj¨¼>åð8˜V¿s×.'_ï˜a  Læ F,‡×É÷±éeÁsàôÅY°V7N­†!R‘\ÜAvgF¶a/%]¿¡$æ©+/Ë +LþÇ8D*û2Ò:ÛIŒ)úuqxaÖœë!¢q3y—Š?›2þ'#*YEî+LžÌ:O«žO|¢X:ñ?¬æ.ñî€÷1žB´ñ¥cÎlŠ/ZýCz9!Rz=ˆ²þ3²`e¤T@ÈàÕÃNÞ´¸0Ê ?2xœk쥖ٵŠh}Ï„#=ß'c~Cu’Ðøô «/üAXSä~$ +5r×»òqyÕò8[Љ†êMôXôÃzÍÉ–i´—ÏÂŽ@„à;8‡#Oªïô¾ä(\Æ™T¡ +™9| @%ä¸ÔÃVb#½>Oè‹«Uõ‡ÛhsDRÈtÎìsî¡¢767ì˜ô¦÷ì(Æ£ú·‚! ·¢(Õš|5D +€ëZ;ã*b.ã è¿N'Üue˜N‚ŽLa%ò' »Î·V0š(2pŽΤißЦ~ä ú»œdc‚`@ç4ø ¯W“"èøÚöM:EgÈ0÷‚MÐ짒$«Cç™d¹sᬬÞlpüàôc»B†á¤ÚÃx¾%á%µS«ñ –r—%œômθÍ(ÅçIÞéôq9.ËÖì“ôŽñ亥͎➀TH{~ú/Õq™ˆ˜O¨(á^­4J0YšE¶XO*_(cª%²µÎ°”ÓÞKœÛ¸¨VL,mÛ¿yD÷oIq—CCv1‚À&~ª±¯¼®$;ßOŠÔ.0t™»‘6½Œ?Çù.VÌ°ÊÈRUœþçû,ÍúrÃÅk‰XóèöF’=“…Úû©†›Ì® Ær>V \½kŒm¤|I¯üS JpOÆx3eBb“ÔΦ§8 þïnó²¡Ã&58‡NÌ!J5øç1Æ^½Ôþ°[ú|+±pG œÉ“‡Oý©±g¾*ӥ ‚Æ«è 8!üëc´S m“«D&»‚ÕkÒS‹yO}ð->ÖÙR¹!Ó£¦¢öÆ Q¶#,Á8ª­½–#€&NdÓ4\Ê>\ÁÏq˜ù6ø6Œê§CÙ¦ìÏ"-$K¹´Jh}Nµéî ÒÜ“½~EUFH+îä"7’¤"…Q¤¹žš]ßM¯®P*@nuÎ* LÞKjði¶6”oÕIýsr¹gWìÚ^,cÃ-LAßÙäŒwïzìØê”JC‡2 §M¶»zkL9l%_°ÌæVÔd•—Š-ptÎÀMgÆvY÷nt&l¹¥cNñ÷rœÛt:Ò-6NY;èD™ øU®·Šu:4r¶âÇJg•'õ›UÜó(?“L ÔgXï`•Á¬Lx~§[|®— j0#^3µ˜ýŽUúûÝTÎüÐD»¨ ƒ‘µ[ÀŽTç° T õ‘ñ‚2Ô„T·2ÞRuª/ø¼dÂìУ¸–9Û;úøSª:OÅ«2>çæîü7R9×0ÒÂJ¡J–Ú6Ö6¨Þó=Vx&NÕ>ºzŠÝ”ÞÁnA)\ÆßVº0jhr®Xì½ZfæM."z{9û”¨î_JÐ gcã¿ûQìOrÀ)ôµåÄVÄ £Û‘ÉÁR@ »‚çé9'šD)²“Yó±¢dy¯Iä™UƒXÙ“©RiÀô|–¾‚ÐˬËëI;®³ÝñŒÈËÿZP-TîÁH`ô<í¬1{Œ’b¾FÀõ.j´²÷\è—ZöØ^‡þØäìfì«·Såä/IÕZÙÂν~xï3d‹ÿÀ=õó}†CTq{õüDQU…‚énÈ€Äñµ礑;çPb…¯RëGñ‚g]#äÛ jçno/áFø“/lÀ©3’?‰Öèí¯zü6®/þ]®úš2´íúPÄî¹›7öEÞ$Æ;u"ÅcgóEΚÊý¾þ/î‹qcúHŠŒYÅ×üVåÅL¨zç«2 +f{[Œ±0Bãàô\ykÈO‹1rÖ†b­Œž¿ü€a·ªð…øs_7ÛOª3_b{Ì«̵Z*Ï~ÇÝ;ˆVo…£†Ü'ŒÑÄוּ…Æ{\¾ŠKš(YÃS¦dOîðx™¢>—HsOZs1¬óûÚ둇@zÚwÀ=zÎmP³ivyDªß6³9ª´§ë žÛ¾æÀô¹ó5†~Aów£Ä7’'trÇÝDŠ§ì·ª3}Ò7Æ Ç>…û£ö=´ Ž›'¶Ž yi\K ÊsA&ŽD~Ê{›·Ã˜IaÂù®¾½ý²VEÂ)×›eúB|9®/ÙŽÆïó…´ß.n ÿ¬åþ1ŒƒÒ!˜ç+&[×}öŠ÷ž'b¢g/sž"±ŠœˆRM%å< +KW1fM[n¸D3„ÎR‹¾ÂæÏt…;“$W¾­˜ ƊЮ™ÃK(:ëPW¶¥·3a/²q÷tñ妳Nãu—fE€)8Í°kSŽNîVi-†è;ˆþžåìBÖC†œçﳼ›`:ª£*îÿN´P-%wœþe°)‰CRÂé¹ß§Âc+9‘¦¾¯+!Êo0 É”N$hŒk¹ (ŸcfÁlRX^‡õ ‡0ûÆ~!;€D–fjÚ m;4ú @¯r{*Ü©òºMºSAÁß®jV|dÿ¬Ê\–Î[!ÆÕÌ°Ù“ =ÐP7· ›—”ß«¨¼H ‘çÝÀ +G±H^.ÿh+î+*àlýñlP¢SÉ-Åo}¶Ý [(¥@¬<¤äLG¦w­7$Ä,¸ûÛÅ9ÕIt”Hß­Ã~–J’ŸPu€—yžÖh)L‚›öõ#û 9:Qæ!ü¸ò×gì¥$Ä„Vùy²¡5ÞüötMI2û¶pˆ¡¾™ÅÒ¤£Ñ¼F±°?Û„»U~'Ž€ë^-\åÉÑ»gœº¾Itcú1nœ!×ó’P%)l=퓈HÊ[µŽ º=C4#@[b_‘æWò’2ƒcÿ µ v*Ê8fú“;úíê$r­~ؤKéU5jÍ«%·†ÚJ8àþäâ½ ívΈ±ô9âü×åB(†\ ìÔYé"¨öí:ŽÖE¥…é½Êd…™¸_YJ)Ùõ2ÓÍ‚| ÍêlKÛË#c­<í%Sa&P”èôï¾m„¦KÊ÷’ñ|—NpòÛ×Â*°ýu?´ÁÚíóZé½$R”SÿyøÌ\6Þ?“ ´pO…(fØY¤®¶¿ÄéÚû<(†bóÅ*± 9à2Ç¢ä¹S3S –×LΫ㱸ߔ žÌß1sºpK©VÂVT\æoObêÆôG255¹' B1Û!V÷dçV¢ÁÚò.[7õ°½‘8ª¦––kNÒzÞ® s?¡(Ñ¿†]CÏrìÂH×e"ý†ÉH¨H%ŒZ|veˆI™É ¤´, ҃ǬŽÏaÕ©Fô}–Àšü…‹ÈkØ­%tÒp€ v ×Ý»ìÍκŒÓzIoŤKJ0"û†¸ö?½æ<¦Í•èì¬ãp-Å ´‰´y{¡:MöüUÍæôÓ¯#`ªÙcô“)ç‹y)+MLECçÍâuÃà’„ÁµGÆ5Qh¸Ãú#£)&‡¬‹‚ˆ+†–è20&èÆŽéTzÒÏâ:„_O€9覅‡ÃøÀzî¦L3X[»ë—(v¬ Ak³ù`t>“knµHçÀ¼W³MU§€L›ý?FFâý·_¹âÖ¿j× Cç@{šb$ #Âb +½2äòMá…: +涡KžAQ‹àÕÕ©×i‡Ž©¬Tñš˜€÷ÅŠ¾½„1b¦¿ú­ŽöÖÝwgJ‰ ¸Ô¿(ý-_æÐ$œ…@Š.5È» º_øÃäGËòT `·1A:GPD®ŽÓÀŠuXS®¸xSÎÿ‰Œ÷‰ÀÂœíÔ:ð$Ý—›µYÒ_1?Hö—$ŒØ–‹ô§¹ÖôÉY]o8å™i:±C÷èh„`µ€-ˆe”©j(Wö`‘¾Þ¯RðųóøQ,”ö±ó wúÝÕ\ÆÚ= •ESd‚Ë1€ ½2@ü#$ƒ埣Òæ—9¥&Üc})k>Ü;’°•cm9àGDIrmEcÚ‡¬òáó-lu%z:˜A ‘,z |Y"•#çÆÕÃðD¦Õ““Â4Þ Ñm2²o|eæQ<û¼š2ª¹3Üf4t+Œ¨Öc»Ñ»´9¦É›Mu Ða^òd¬¨ôJk´£O›Nã6¯{Ö)‰Ì‰ˆ×Ó Q’”üo…ºBm:”R¬ˆš”‘Ðv•ÛD¨f3ó¿q¹‰¤îwyÂ}¤(q²Ðûy\sØ€úFBî6¥ºÂ}¨¢ªârÿ€.çŽ*vÑÉX?ÔïƒÜ®–%òóOÜHä(åøÈÛÊ‘ä<-6ÒUýçÉî@xñ£rbÂÍÓ«CÇéýw»ÜÉrj §ñDYä~—Üø½ ÞçE’ʉýíFŠßîúŽ¤WwÓæ)ügÃýž|…Í ë|t‚†ÛÆ€êŞ׎ï®üßÆ]òä)ÍŒƒØáŸft%sih2¯ª#G£ÁñÍ¥„"Ì€/œÅ`Ad5#¸Æ£¿´a.õé¥e§ûb¬©ò ^ö&XÓ^MÛµëŒÄ@s¾å·¶ªÝTýCtnS ˜"véâ5Ó²†P`ÃÑ +c51Ù;^ezy—ßk þ7Y©AÓr¤Ô¨™¯à%—÷*P }1°ÝDrÚ©ú˵¨°ÍÌÑ4€Ô61NÕ'ÃT¯«qò_‹8zb«þù(D]•‡?•ÑºTjpá÷¼”ήƒ´^ÏÀ†<ˆfœ‘«‡ eÏà$]·{¥Î»í·i˜>„k•AÞ†‰Q¾-9Ï 7ÚÿÙÿß¹F"àùqMü½öŸyÕ¯$uä_:!ý¶KWØïá”f +zùr©=™2&¤4ýª")îÀËm † Ïòê÷± Kë^2‰ x|“WÙˆèOß|r›rXÇbÒc=ù™Ô;JKŠWó-] U bÞCV™§%7îIÒ»ï*Ú±=Õ:îV@Ñî¿zW®ñáÜ2±ªåã!”¶U]ø‰€”ýÖ<À Z«[àZIuó_KþŽ}(6WâÁŽO‘3ÕŸnK’^TÄ›ÃLîk½ u +AÌø2C0ˆ¯5§Œƒñ¸ûoÔ]}‰I(&*㤠½;Ã@ar½’§×@ ž\-@óˆ…Ô|†5J¾ÏZ¬¬Ò¿ÖTÛÆâ¼ .áç1åT–€"aõJk:‘¹bÄ–D»| +endstream +endobj +2236 0 obj << /Length1 1620 -/Length2 17956 +/Length2 18673 /Length3 0 -/Length 19576 +/Length 20293 >> stream %!PS-AdobeFont-1.0: URWPalladioL-Ital 1.05 @@ -27550,7 +32939,7 @@ stream /UnderlinePosition -100 def /UnderlineThickness 50 def end readonly def -/FontName /RUEFYH+URWPalladioL-Ital def +/FontName /LHHPET+URWPalladioL-Ital def /PaintType 0 def /WMode 0 def /FontBBox {-170 -305 1010 941} readonly def @@ -27571,448 +32960,1527 @@ f ¸=0׋kDTOk¤ydÔtãÓÃÔPö{ýVta?lÆx«hÊ»âèp n¬.Â×2V«rœ%v%i§å)à†*–‘¬™ÂDà/çP800…g†œ[Ä'bSÊàJŸŸûäðæzÄ/›·Ò$½“é5r¹ì5¶Ó˜]¥”–0Þø:N™ñiB¼%…Ä¿H)¬ÎeIÄ”€ØhS!ÈOX£Í㤿ìÕO4 ¡¹+¹2­®== `™ŸøÐôæëñ_4ýÙ»N’U–þMÚ“lÆ%ãŽÖõž¼å&°kaŽ÷ ;› ÉБöz–››¶›_vÂùKôt©f1F[Ñ“}á:ßF9>q0(Ñ"ŠÏØÆÅu3TQ á`cìµMŨ”Ø’£<Òó²Ý–ü»°­ýõ¤É¦Ô!¼²pE<"+¼ÚÀnÉLg!:˼ßAb^ïàlO²0®ÿÔÒiS rþû~wÑâÒ˜¥_ú™g@¹“ÇŠŸ…¦‹‚å¼Á‘:nfâ–Í%õ$î77JNÏHIH“VweÊž·%¯.¹ŸÑŒêx ÕãPÓSÓ!·Þ¢49$úÞjúÊ«Ó)@çÖÍ  <¥´Ê,­ÖG—KîÅ5‰ôKlÖfÄ–rÝÙ%LYÚï7“? ™TÄ.¡6¥?~ F 4R¬£u’–4] AÄ%”Jïë ;°âGù?è›òLb?çþãiôµØ¯(á4pÎh@©0…›1·ÞSUœ?>œÛœ–mÚÁ l QÏaêlM¯6’YŠi^¨GY{G`Rse­ JM®×2ÃŽÔâK^Œì~<™i¯§÷ά™¢ Ê5s®É7‘ä~®û÷µ%,tÅ{w[+¥a£¥:èžó‰ªª˜Jlºž““Ú,¯ä[B>Ô=<½ï&N3ËQ&aÚWY†‹•Y|ˆÔÍÈï˜^ŒhfÑÌ£×Ù0ñùè:«0:ÊA!éIeñθ û%ìü¯çÕ…övŠ¤ßÎÞüö¶zl u¯3km%Cñn庞xãotÐ0(sýU$o#j;ËtLïÀ@N™x¥o3lNùìžw¦ÄÞzšô–Tê3(RØ¡ÂàÓ°w‚ỄD&;¢¡ùþÆC¶[óÂáÕxòœe™(÷Ú®ŸCwÛ[ôšH€wï32€Â|xÏ¥“mk”Z.·ÚºÙ«ÈñMÁˆSš‘é$‚>½“ „iF²kÝ!´:¨Ÿo¨¡ðj8½U4afs74?i79 b‰}ƒ4{Qù•NÞÃUׄiÂN5š¹ƒ‚ºÎ ž.ã Á^‹›ÿƒü ¢|úžíõˆLæÇ¥À0³œµì‘kv °z­Oxu3qñ5¡ƒ„fÀšV€!Q'z#Äaœ9$ÑÒ> ü-šeGÐHu„‹4‰Ñ¥U*—Ò"U†ÐøÍÒP" ^É÷.EåðbÿÃ&uB%´ÒªÒŸ[æï˜×È®äfòJ)SC®È"âÏ‘UCö£rí{ÿ½{<9ʼn7 ‹º[3SÚANSí §çÐOø€ã̓ÒaW††[9¦£Ú­07L ”1m‚þ¸µ>)ñüÕ×öµ\в•n+tmþ; UÇ~ŠÉ•:5xÁq>à‰V£b¥dSA陼æB\j gf—CX6_:Æ` ’Í÷o3U6\<(L÷ÈÊT±oÉ´îóÍê7oDDÈ‚LD“˜ŽŒ™‡ÐÞŸ8D}T ¤‚öJЛjœÖ&™1vW `ËúçFE2F¿Ó8ÒEï>Å×Ãþw6«(ô¼Ó™ô}ƒ0ÆÉOöûM@×9àùôI»² qS¾¯ð Ik&\}R»jâmòB´ë=Ûk-óÀw-½7õ7l d–w¢p7BNúDÆ9h~QŠ)¥’•Ûuåå˜,í&¤O1+Ûk›‚ÛRJWþ\0 oArœ@Ê‹þ«›P`ˆÛ<ùœxa:ö T éªÝÎ6yd·±iªg>·‚âuí¿àÄ!º¢ûúÖ(©þEÙŸB:‰û¶î™­΂«OŽ}€ mÆi”Zè†a§Nv‚d ¿Î²i²·D·qBüÊ5šõíÈŒ[$œXÎÕÆËÏc´ý"CäXL„š›ó‰´Íoõø6o6ûv®çÊªí¾˜ÆŸ‰ççtÏÇQ_søg*ë¦Vå´cdžŽJ…7ÓØSÙÔ•koÖ»° üÅÇ g»XgôLÂ/­þ†oX6  -;æ çw$‚’¥n^ »3@|V­c@Ù¦ $þràîHÓ¯N­'õ|ÀøRÖ5O©´@ÆÝ WóQ×HXbÄ@w@_ðW‡ ‰‹wþƒëËMÚ‚ƅO÷ÍáºqŒÑ¡SÊg@Š–ÝÂ…ØßüQ × Ÿ}c’*Ͷã~µV‚m¡QB"]|5ÿ*|P58ùëd»)Ú$Üx¯Øy½vÒ:÷æ2 ÇP6<Í|»ïë™ÿAž;G'ß4oå_ÁÓy“¶ýË5 îÏ-D+ÀäpxpÿràZ8;IK\ênNy;•&v.IˆëŒÖq3fx^¨ lá-N¯j]q¼úïÒldÒ2`*AÕÂÔö½ç$™jm”z„§p´—X„5¦Úfªc¹I| ö²Ù‘Å\®=+\xѼ<÷’¯ýg—Ó -BUù9º¥l˜>Ð;ñR;š`EƒžškÔœ'^—ȉïHC¼NJÝû¯ŒÊ¤9ëlV4ù”¼‡f¤þý3n”dt/µ ˜é>¹§cñº3†àQ»ÁÇû‹\¾=)Z2Ê%Ü·ÀzŸÔ—<©“l|ëÕ^µnBPø¿‘‰Žý$r± ë1ôåŒÇ`º÷>Þ– {ò®èfMjÒ)Ónw`’"ì‹:œFî¦sóŽ®\†/n½tµM¼Œx¸~]¶,†‘íbÐmšÐC¦­²£ýÞ¡Èbɽâ±R„+øþE#^?Í Ú­ÀÖߺúYËæãXÏù›\2xŇÄ<ÕŽAeo{5.$‰`ý ]òÀ€Ðµ¨sv_¬T*Ï;ɈidbMÍ“dБûn2,Q+|΃f%Ôz%žzâ¡"}Ü’=)ßâÚl)«ó˜ÉLÌl–èØ Ý{DliTþ\CÁ-¤¢½‡éœwYãZj/úîT⌠qÏ©g+ßSVtf8ÍÙýhãÝ­ŸSí! ºY¸‡Ÿ÷.£za§ßåNíÃõkÚ–:K -ibh -¥Î¹œ×_GÂOB·¦Y(u÷ùJtLgöÀ"©¦LS‰œ="ÅV–JÌ{<ÞÔ Å s¡…s‘Ô>P]âýß©áZ¾UÈýÌÅЯ¿lÝØ}3R„!íkuòSÔ¦Ì0äÏ÷¢MìO†]ó"®uÑ¥úR‡kjÈ6Ã]€ÝîA½uƲÇÿ .mH§g8‰µ\àzÛ•ú•Ü8+li{Æsölt®—¬³i“~#“o~hêl¿Þoâ¢z„Då®èçð/Ù{ícvðöj¶oƒÈÐ(Ó³„3ÍÙj^éaH -C§Ü¶L|/úe‹²¬pØã‚>5‡A‰¥4¨Øú¤þGä²&‚&›}9;‹GEVæàE…´ÚTê‰)ºÓ‚OÍjú -Öɮ߫1Nµ½w-H­ä5&‡7¾’¶ëÑ­ä­›zÒÌÍ(– éðC%ÇT`Ÿ\]ºSr*úÃ]…=‡³?Ý"-ƒY!•ƒšqRsá¡¡í4Í©lb_A.Ñ3é¼9÷ OÈtE9o¯lh'ø ) ¨‰4Ž2èSfÅ–9rÉ@‹M°N F{5µÉ<3™¼üÔî»ðrÀ[°R…7í¶ì™Ä™}L@¡/*”Ù5^ÌökÆÀGúª¢aÆE?.¨Á1 W<Â8Î#©Z×Ê›Œ^~P`}‹ÁPâ”Úùò‘"†Ód´±Á’̯ËÿàÉ…el3žƒj”wòš„¸ÒŽ–p®V_ù:v'©{–1eéÚ³YaHüŒ.áÖèTõצÛh2–hš í o*´™ó˜ø-a¯¹Òl 6Ó,ÇNÖ7𻙶‰Ùlç=Ôb Ñ<~sÒy’ã·­_ÛƒöíÆݲÐßg^ª¿B>ª'äMžjàšGt³mAŠ=÷ü¡VÔoÊܪ²£vLWÓc‚#ŸS%Ò­.–^xÙ5Ï“¨žŸ oÆ `;nbBš‚ºê†Ö‹µ=ZõØŽ27ž,á7Ÿ6ÛRP˲ÌÌjQò‡bž»¢ºá6½0™þœ8¿Jo©§s¥%8 aN>sàú¾$ÎI|T&Ó¤=s>¶ ±MBÒæÍ"Ø¢ìä§ÑO¡æD i\g,ɦ-2û{W‹»ÌäßضæËÇÑÐö á”'û°Uȼ¦’økUxbùm„Æ¡»2PJIL"!)\ŠCXqEë’‹È¢b–üêî†xnñ¿pÀb¤VC‚NG…wB³sF8Ï^H5”è¦ •)&‚ˆ¸$AÅk >¢jÀLÃø›M¼0'³Ê™ð@:.è!îw!qÖ#q*èc?•©ê¡?I¦oÂñw–ýýy"9º7ˆžõ§ÐÝ%ø9’fy³ÆîÜÁ†yUôóØ3}Dý?¹„EkêÕ@ÌøÚB÷”.Ðá© -åLY/#K]¿Ö¥"—¾‡zýH;z.EÕË° j’Xöžù.:Ⱦ#¾mÒ³bÊŽxi£ˆÀðk%®hM¾BîVèõѤ-@ÌûE¯ÛÙ¹õã°­瘲(u8Ø‹$tVŒ]ä1Vœ -bhM¿$,ÐÇ‹´X:ù,2Cæóe³â±|¼»YJ3¬ ô\†¼åÄÖ.9 ýŒðó`†™¢úÝ>.ÐÍ2qˆoò²%8¡8)BY&ózR¡z™¾…R¤òÇ©9×É»öÄFøZ¦ð÷SÅ•)Ì°ÄÉÔÎ/l?%BîýÌ)X£ÛôžòtÆëìËRYû -oøï<ðÎ vàï?ƒX8ñd|”¦G|ºíwÂm’¦ ÍëÛ£”¹Ú(ê|´˜1ú‘uWJDó[¤0T!!¨8‰Æ¹¶)5ó¤-îáDÆÅò·ñˆ¡;~)ŠÀ1w"Ã9ö¯óí­Éº`s+8Òƒq­Ñ>&<%I$ð•/ÓaòE“)ã¤Ѹޭ)¬6béX].Žr8vŒƒ Ð•hˆÍ"ŸX ûEÅIÄ8]û½p´­˜\ÿ‡¾Š")fD-aŸx¢¾äP 4‹ìMœ3G£øh ‰»ë•$Ž³ôóá¸Üî1:øÓÍ(•2iNLÝBù6‡w„½€ôw¬ -J4¯a_¸— Ãx µ®•”_ßO”îTL–ØsJ8€h‰®¬ÅB7þPæLâ›ÈêØÏù¿kÝKv]ôê›VH=öëuKµÐèï´°O›2DþGí©ÜÉ¿*”šˆ“7-’o>g®Â¼ÄÞ°¦…”ÄbÙz,»?UÈûäp¸ PR‹^±¥Ëø–p_*„שjaº»Ò¨jº0`÷zBí*c3Æé³çeã¼Nzܦ±À½U¬6 ,Ò&XUééÕ ÈÈpYßøÚ ,jHÉcº¤¤ýó’ûs¬3$xç@ ‚ôb“˨?T!=]B* ‡Ì9+ÞÐyïªôsû½€Æ'ž>ˆö‹ ýÔÇ@³³*ÅZ »ÞXM•²®éÒ$‹ -¦I,ŘÙ†úmà5 áŽÐ;+`3&¼Ó„’ÀŒ0D!’éBœâQ˜²…#^(0¼Q„$÷lH’®&bqØóy_gC¯ÇÆ(°|»}‰‰ _îK®TSHݦݣTxUßæPœ0ì¿E>Bçúb°(«÷*¬vš8<Û‡O}$’˜vHŠúŸ{~ÅhR^†àA¦ÞÙGùÇë„NR¨K éa”«8" -FWG†RÝRfAbú°Ÿ †écYî#ÇæxÏ«‹©‘½óE v¯}SŒê®ê{ôbçWZ„áS}œ¿ žD¹¤…˜g:ä__·­Û+®Ï“©Ÿ®ZƒEu²¼&¤>Íœ‰²FZ¿Õ34× gþè+¯ÝKŽ; àc6Æ¢O·0|Fú"Ð$«¤èç fCÜûÃä -‡´¬u¼Ë¯i†@Æ úÕzY{=×Ú—Y -$´–‚G…gº%ÃfÜäÜ~j¤ø$ÝW,;|}ìàôŽ3Y;ýs‰‘Áö`ŨƆôä£ò—#‚D3«ŸCB¿0 5ö½åe†W½ytý1…Ò,òì)5œ×T›­bMšb?*½†²šXɸñX_-©¢@g{ƒ®ØGŽšÏ‘3ü§¢lGÏ>ƒ!üÁü³"^0¡4Dv¦¸%~Ô‹·Íðú§ÈÌïÿ†Ö0cV¦E.c˜Ì € ËКšU2†T²ë;¡n™YC÷Kð>¨¿ê-WÀrÎàÒá{æù®ÛJ «´s' -\ªyS|Cþ29‡%y˜ò­ÛÞYH9dM)Á¹ Ÿrñ —™ãøX“âÑÆ­DKý¶Jœ¶îA/TÞu³ì‰®²–ÙñV®9=™h³ˆØdÆ•´;vÈÖT†8JIfJÓ­·Wph…$à´Ó^QÖ@ÇAÅõM6 ÎYRA¼¹ µJ¥pŽOx5‘ÊYS£6{£Â¶:æedÀ5JA‡^:ÜPôFW“AQ0:3¹ôZeÚeü_›D-ä¦>b³é±™IéàC)0ÏÃNôÿ¨çÈõ\£iÝøH/ø%¶Ò90ÍjÔZ0oï¶lÞ‘±×îX„jÀžëY|6>1\ÕiªD€ÛO+¸¹œºÞgÊ7P‡C¯ °/)4E›Ñœ¿q3.=®JÑfL¤ˆÄîíh£KìŒîȃ¥5F¿µ·ÈyÑ$`J¥Ÿ—* *D¹H‚fu –»e½õ¹ -Ø•¦ù´ê ×þ G­ŽxIÅ@Lèf¤Â·mé¾Ðýø>ßÉp‹A’x$Ô÷ L£k¯óuVSmF,^™@nR9áT€ÿ¥zžYøðA¯@®'1¤×Hr›Ãä¢p‚2gܯpXé¸ìMMî0ŠÂœ®Ã¾3iÆ4žm ›¡«'qŠr¹ª™±&…‡öý*i…mãy©p€©ÌA’h` ‰LœeVV}£Zžõ+ñRÙ¡ÃÌÔ[ÓmÍ®tî?ÇŒœ‹µ5Y¿óAb{ªïû™×Žò9ž†* û%Ä.V SäÂ}B4ÎAcƒZÚPþ¡:}Çú¶—¦ðz©_BíFH9­²—½]5Ùáf'Ý®¿ôôé T¹ÒC»€â™žGâÿq¿ÉÕàAhªÏú]‰õ=îá¥sòÕ+ãÖ³¡»Ùê’ãÝÊXzdPÑÍ“‘B¸m! §Ândí ëôš9½K"_z æ…Á  ‚'j»åòØWQ¥¹WZÌOöÀÌî6ü{õe~–ÜCŸ1Þîz¶¾'<ïô¥“S ùrF,ëkiׯv…-Q7ó;8:nÅÍö¦ ‰újíÍš ˆHã6(PŸÃú9Q™úì =…:›Ýßzóòb´3%j˜‚†€CAêeÔA7È'›pÂN¶÷*=εŒÞ¨Ž…g/Y‡‰Ñî ™ñ‘2icEí“Y*læ 5L-Q¢2T¯"R…‹1‹&†êñ0‚_:þv`†•Ïéz³Æ¯Èõ»‚ï+:ÐÜÜÑÜç€U€z:Ž‚(P‡Øgé5´?î)¦ºøùWbý?õ­ÁŠ¤Å¼JÏY<¨>X–´”wv×O²ì^eâ¥@C+—Íi÷g¦zóºà,“§i’ë&æ ±ëëztvØ1ÒÀ8?9=‹ù?ü‰5~6qòÑñכĊw‹¸¤|‹M3Ÿ8u ¿J®Ã‘CØ\Ó,”Ÿ=–éÙ]o7Ò½éáŒJIî{¼‚Øë¦k¥88áìÌ‘bÖŠÀ1œ’5}ú]Úç®KO1ƒ9ƒÚw¯ l1…‘ÀìªcÃÓ¢g@fÛ­œ<þð…£Û$¹Ý–¡5Þ$´ë–ûk8nÆÈ$à¤Ê¶.§l¼ØÛÚÃJÃgh)™e|TÝÄç6Ì¢*Å46¯ôâ™7?­kýöj=Ê< *i’¿¬  ¦k†:jé^X[þš‘ù>íÒ¥€§¤4ʴПg+ÍMö×YÓ ·- -Hì$(¨BÀMPä.Td¥'÷"²¯&¸Ó%Ÿ–Û;¨2±\ßl©Ú xÁ|©©µ\ûH”TŠZFEŸ×V0ã«Ú³±æéb>‡ß«=ʯ ^8^äòãõþs·Ñããì8õ<¬“ÜýCûVˆÓk ¹„"¹JeÇÚœ„bå¡ ,éGÙW¾¯+ýÁ“OoEŒ}µ«ïJØnŸºå>*ÿÝ/÷Á¥Rµø,PÿÎ@¯Ñcss•6K&Á‰#Wqì……èê5©À¬ŽÃ=!º©à¼8ÆÅ>§–eªÁÉËT{3ê³ÎRÿ€LI"gl„d(Ûu¨E‰¾y¦nl)¬åiqÃ"k<½F‡-§ät‹¼‘7žã0¨…ý_Ë©ú [Ü ¢Z&ãßO¿ŸgÈz{a¯ô˜}>v¼Qoþ»~™“[Øxë£l¾Óû1Íu®ÌË{þîß6N¡ª“Þ‹9·|0”¦tá6l-“ÝtðæܦŒKgÑQfŒørÿŸW¯ÆÙC%{¸Êÿ*±9×uã®$Œ7p©ÅÐòÏyCõþ)ØÆ>í·dQp¦¦GŠæ-´½ fÊ_g¹¸;q0s`Ãý_Óç°‰˜q¸êIŠQäpâbªt‡/H쮺۷ïG®¾Ay¸¤›äøþº°:LSCÅû@êÆ?õYÁKR˜.ézr[}fK³RZÿ']à‘l‚ýn‘iX±B6ªó¥ÉSþ\,Z'·–'Åq¶^‹ ÒiV25 "‚?âD˜]Ã(Ìçâ™È—&?…¦AW#7š â&C€ñ5ÿ¼Ëÿ‘›!º£x˜Ÿ¢+˜zdÐÌ1éWW6øŸHáÙßô8ïŽ ^Ó¿:aH÷úpìöºRK%ŒËü‡‘¡›ÜOïºwLys¦ëC¿¹#sœ©?à@¢lt<”^ˆf"™oò…øëMPÿ²Ó¯ÒðGíwÚSlú÷Ž`ŠôÖ†!zËiC(fÔÁB’ú){™ŒV¾¼ê ¸ „Æ“JYÝøϯJ{8kþ£‹éš¤‹Å7ú¹”õÒhÉ+g©ºDó„‹2TŒwqœøy0?¶÷ú£|q毨¤ÙßJ›Îôc#½­Ž[QT3âÓ—„øÎÀ¶“ äÑŽŒÀšOU¼'QÃåõGâ¾Ií{EX¿_ú7áþ‹Š{%=̨Te‚8¿«Y¶_í„£€$k‘ÚŒ–7Œ«ûïÙSW›Ú~öåe€ÓTÞä•Ñ‰ŸôÔQrQL5n Õî-«ÖÿQ){ôØ©ó·O€‚Ý’‚»ÎhuÚíûutIƒÏ\ƒûûQ»ïENé‡ÞÝs€ØPIA;M^45n …’C õŽ¶Ê³­Xà˜™¼^³ûçšé|Û¥6í¸»ø:LíÀ¿Ïqgœà+ˆðt(Ð;gg;L‰žB»¶=Ö{’w;I/£0ëÔYL}$Ðz6ÂP=ÏÆk`i¶œ}yüø$,rãî þhÅâÖmé̺¯oPzýN\¼ú‰ªÄŒ`C²Ã†íX3¥ƒÜ­îÑY±eTÈÁ1ú¤&3ïx¥W·Ã2û*Ò,Dep˜À\½ÊŠh"\,t›6ÞÑýYØ}ñ9cÚëqFS½'#•ÙÎ-¸)‰)]æ;7)–IUs -øb˜w‚ÏÛÃmöPñoê?ñª¦æ×ÉÌŸ‹òVÛO!¦­_îÏ·­BêµëöѤó›ÕOL b'ê]Ý4öHÈD³±6ÿ) Ò< ärÂaÛìÝAEAF)¦€†êœið®@Š#‘c¢s‚ZöË];^%#ýøhÜiêv’Ú¥±5ý Pu^ÂrÞ™¬t,­tg®ìÀI{û¡¸ü‹h¹®?h‘ÀðCðXá_U!¦ïê2åhg¶›®ò ¨K†ékïöõn7 -åq±søBÂõð—\ºÇߘàçPM²"öDXœ%žÝ…¹C½ÏÜÕ z]œ4˜–³à(ÂöK -©Þ1K!0ÐíoZ¯{ö–bÂ0¯q:ÛË&^/†@ck5 4×Jj%—L^ m£ÎÎj—W{?ÎÆuš†õ‚W꣖ùh£¸·M$3[¿åCdìse¸wJ]$$´&ºÛ€{pyü5-žïUwU_Þ_¬:¯×«ÜèŠâã£ò¨¬š$¦Áööó4–¬ëNs™0S7Í™e×Dnf#›Kgð°±eÝ*ç:xÖÃ[ãÓ ^Ø2ÖWw¸/Õ’ -¼×LògÅÎÓ_ˆl•$6û/ŹkM½ãy^#ÆÝÞ1G!|&0ä "—­–”0Ž! m!uPRMwÔ7TÁ˜îåoC9ð¯GÙ*ŒÄ ôBè“ë㉿š[‚KmI“îA^˜˜…V‘žµöt78:éÕ¢ë•Ö³cÖÍŠ_ƒ·µ`ÜS NA Gmõ\ÿ6xËYaøìûeÎç’½Gœ¼aH•f'T%™]äd–ߟœñ1¬¹íAœwY¾N³lzHÊY7ÏÛËsT×ø˜Œ„›à×c).†.-Fo¿£÷"‘(Û-ìȧ¾à2zÒ)ÚÙVºô¿á®…YsR—;]9©R}ü¥O¥A¸Ž6Ù®uÉ—èñÂÞ•ÅBˆ'•ëñ¾áwS±xÞô;¹û3¹ƒ”¬üAñ£iïc½åll|­|îCêšj`ä=N‘Ë…¼ã°Õ{_TqÓÂJ]¦òò>¿ä#eѵxö½­U×Χq¿'’U.—Œè”N˜óT®~Fò¤áüIðu±¯Räæ -qÒ2פíå'f±+y3ô¯ÿŒâL¯e°Ð9ùÞuÌJ¦³îUßÊà\öŸ† €Rkv‹;…KßÄIíÚ´4¿9°5=>ÄKІpCS>F›ÉTC9B÷¾a0æóüñ× aà0wÒ¡¾Z¦áÔ]ô$¾mÙ=„Ž5IÅY@3¾Ö,º¸l¢té³ü$^hˆMÜ瘑dü?¹‚‹ß«•S´É±tºÐ§Ïê©dÔ4QP\.:ìå(zõ8Οèý“Ð ¯Ô‡½eF22L1fuˆ:×#Ê|Cr;ÿ»×_:«Â5òµ–2Œ¿1Q‹Nì-+™‡³ŠÒˆzZ?o‘†±r …̳Ò ã €ËDc_à-~ìk?Š3,­â¡Ð}ã¬3J5%›w™a˜!¬÷>˜ÃdR÷-öÞ¥StÜù íÅáJ§4ߌG[éÆ¥O‰_MÆiG¢C̾õO9€]‰Îç4™†‹TUOëÃcoÈŽ®±wˆCªwqAÇX(V»=]Ívjào<SHWiôŠq,üuxº ò #7´èOg°¬hn—»þfI.DÒ9½f²éÌÈ.o6£üSß¾Û;àÈÞP²®—BÑØMÁÛ†mÚíC'€0 çp¿ Ã>pÀ7©{kôögg¸þyQ±žÐlôAuØvõÅ%Ó‰–»àwjpA`I诬j7[£éÓ¯ô'¯‡~1Ó³ù‹ˆˆ[gп~ñ*cS½õm0¦d3ËB -mº2òX!lN×7ªîeå†wú\ÁÛ¢“S\2²D?0éÓÔAk òpåí·I“ˆpI§uqˆÖÊÿ:)¾)ü }.ÐŽæx—ü€tæòúLºKöi‘(þ²l|žI¦Ò¤”0€'U"j±—Q1ÓúpÌ …Äà H`¡’bšÞrJ!Õ€+´Àr »t·ægßêÚ ß{6öJHƒp“d”¸t žì— þ:ñö°ä^Kãâ|JìÝm ÆCCþBíxäÓ¨ýN$«þ$P–z1äþÎòv=|/â{? -æ#yUm¢|SÅÒW ²Í¹ÂÕ]^&ºØb{|¦H¬Ó* &x³¥/Ê¥v*KÅ unšHÉz)ß&âkYC,6Šp Ê`QŽdõmÉDögd]ÉËvî0|"³±ÓÄd$Ž‹ÿôT]…ºÊl 1ØGLe–ÖóåL§|ÿ]+r=ãŠ)ötëÑ¢˜ªßŠqàËw­z)ÜÚ9ºÑölDÏa>[æ 6ˆÐ´ãµp9ûˆ»ôÍu÷n~ ;“ÓjêZöú­wáÛ^Âêþ ˆÔN§£Ç¸i÷0±¢'Õp™°#=cdÄÙ[—äsñ+Üà‹Y8T5´@hÑi§žâ§ÈIò€Ç¿F|¢*òóÍÌÜ D#`Á䇰sTAH5_€ŠÛàEBÛØŒ®2!ü'¼I5¶''³Â¥`Cgzõ£×«ø À1jËi°0úÔŒ»êA<=LÖAä«f -Ñ<ÝlxËVóE"“³°”mG;á50xX÷»&£5#gfâ‚T0Ù¼cK¯½.ûäL²kô»A~ÑU=/õ%ÒNÕ^-”¥@¢›Œü\¡Øbläv²’þ§&“ 7ÌC‹f Ivš‡o~öš$oSΖ¤h4x˜ê«GÅ".b'þC ‘4hüÇùˆ®î.U÷å/Ø8æbÊݺ2%duR#‡NWK–,nç¹2<ÕuÌI×щb£µ‘ëBbÃjÝ=ÐÁµ‰‘}¶°Š­¯>\~&ÙhûxQüž­tË_AHF¼ñv èêÁÄ6®7rÈ*Àí€ê²•d¥ÚiöÁaÅJösPµ;lL¾ž¼vÏ9 ¦ï•Å‡d4@Ûáf·¯8éu¾<Â`ôùüpÃÕÄ„¾Ü£%AŠ>bѨ…c>ö7ùXÎwÂnëìÈ‚Y@cJÍä`z ‰|Ë˳…Xšò –<=þ?Öj©uw鎎’»ì2¥éBõg0$×+ž âEËí ámòlÓ¢MÒrŸd7R…¬h>팿O˜%PD¦Âö6‡‚‰~e;WÐâ—Iÿíâ# `õuª¨²sí.aCÃq¸W­‡+¿ ¢„ñ€á3xþ¹ài¡­¶@Ìe¼ %L-)a…½Ï²nêùó’dÙ¸1YPéðÝЪ«¨¶¾ÉKÞ¤Ô–zT¼ì¶} ©œ^ÚS²–+Ù+ƒ:‚Ô–8 V¿oʯ ã­#œ0›Lïõ¦"¢—xGµgmØŸØò¸ÿE&Ò4ÜÚ™Yu>Æ»v²ôm)bg¶ò/¨ŽÂJºŠ1ÃØHÒŠ‘ù²âè~™²f4!µzbVÌŒ·øXgZ:Ö澧:óc…TPœBrq5-¢º|Ôàru¤`vZ|„.’‘<&5„—R^LÊ(^¶(½ú/ÇÜÜû5ŽÇÖîNU¿)7—y0€ü¼¶äR…îf’qOw‘4WŒ£‚Ò†ŸÏ¯ŸÆÙ|–Ÿþ„·±»”Gk/V“»càïtt+vΗ$F÷ ÷>DîÃiÆ¢’VžêŠlÀU(æ/ùý4̶Ýg4!t¹Ùæ[MfðWW2&<Ä-á Í; £A·OÉRqÛ8`0ìÅ_0(¼§mîŽp³ß^’tŒLÖ?èZõó&ÓÎÃóšî!ô… ˜bã{Îö†š5½A y›«©¾bšDÚwÚ¶ZwõÎQ2y4¼¶ Óˆ+µO‚æ|€šªBh³1Íhy‘´ÂôØ©9²ö伫üòW;Æ£âuêáªmïqÆÏ1‚5i¦þ¹{ë\½‚ÎÁÉ{XšËØ¿zIÎz€²¤¾¶öÛÿ‚ ñÙ3P…Aõ5 úË_£ÚxêÀ÷Ò׸µÊ¥š‡¡Q…ùÿ³”‹)¢÷épY7æj™Î¬BÈ»¼ÇQTÿìÓvyžì+à_mk—ÁJâ›ÿ¡Ã®wRš€#Rî¹·§£U ŒWäů­ %û -0¥ÓÙŽ÷Å÷N&Ç¡¹4#päF¢è]Ö@/P¦ -}79B8cîÆŠ -V >Ÿ&²V±~ Hà eÔJò³šc9ú"â“‹­¸ïHZVK¼Ä-!0,+\a§¤†´"ç·pÙövšpáW8j|X3$œÄ×D‡_<Â7è‰lÇMxÅ]æ£D/6b5Cvf «Ê£¬|îMÉÒd°ìÐÒ½ÄyóîÖ[ç9d!uRnb Ù죊*›^O:¼³xE¹•{sŠDcÑlÅ«ÈÅú&×Yæ3Û²Ÿ.Œ¡Gösuß!1÷³ùIïrcìüÁ8ïf DÁE]P¹ÒØš©ljüœg¿SCºàcé¨2çÛ‹ä;%âÆ/ìõÛ8 ÁÔEçáHk’¬°ló§ÙýíŽôˆ^§,ÙJó©°X»ÎlÈæ&bnÕóŠW‡‡z¬‡,ÇhX« èÜG-iXwÔ‰­t †Ú°FyãúÎ6ŒŸã—õ-˜³èG)qêâ,C `µöœ :ä1*?¸Ûšh‹MM2ò÷ÀdöY"¿¬éJ˜cˆkÛÑ[*¬mÅá‚dGÎó7ã¹Q˜†É2B®UP†e}P[ºŠG|q¦XÃŽ{ż潴ñíó¹ÃÖU°\uïÉ$Ô§tvÃë¨ïÆéqËv\z°ÒØè ‡c­sõŽH¬e~M)àu~¬RÍç ¸]ÕÍ i°Ólø)âÈûî½oxÁ#§+MõƒÍÀ± kLO€Ãè^ô]±eÜqy7wo Å‰ë¿Ñ¶h5Ó1¨âi D 塤îõÅF4ÍÌ«Ç!dòÖÙ #ÆâXI¦ -qçyUûN¼ e M*%€Ø9LSÎKö¡KŸ1Þ×ñM{á¦qÎØè‚Ÿµ Y€¾@ùª>Šµ?êÿq[mR^|ŸöŸè½)äŠïy»rƒÎxàä’ðh1‚V§õ÷A-šUÁÒÂù…~ëª ôê}f~Oš¿y(糺pÚülHYvp-WÄB4ËÊzŠ1ÉÎ÷øpßP.¿…¨® رo…vLü¬{‚ƒ‡ÌµXv~dh…Ð*DzÚY£jö< ìúkñ~Ñ´öR\ƒ%§µê¹µªD‘P57xH·?Æ‘žr2í°Ìræ®g<0µðWiXEBz͸h-´8û| ×aÔ8+‰ŸV÷Ô‡+*‰Þ完•½°¢cA_v?É+-#ª|Ú‡ï×pÁ¼Ö~ÖêóB¾FÐ\Úšã麘d)$“¯…ŽÚª»G«4QV‚`†Š’5ž¼/"¬[z›°Î‘—XÁ¨Süaïõ#ëØÇÎj˜ñmR=aò.‡ ì`!ªxE»ÃVl‹{ÑôdР借\¯˜„3e¿I öß÷„uå"mŒ£©h²e3jn[E˜ÜÇ–N50ÅE¢Þ»n¬¯’jÎJ×-ÈPﶸc{™‹â)n?oj¥‚5o<Ê(G›sŠààô¼™âDÔõW“®‰Ü+P’J¡†³ ìÄq¾Ð²<RV={t¨ Ú‡(¡ÿº.Æ}·n7ĉ Î ö‡KÑ`* š7²k‰/²È™­¾<ŬÌÞœ20²–u -:öcžž£p¿4©Ü°û ßSò’¬Øj;;)º~„?Á7$SÈÎ2õ`.õÕ5>Þü)ÕîðàÉP›â×Î ßVQ³cÂõŸï· - JEô/Ùâxµf?“ÇÇîÚ8Öª9Ÿb*t9憴Sóƒ±Ą́2‘NîP@=i•Ò¼2$@sƒÉM0xÀÑg‹˜^[2PÔi:Ëø‘…®8EÏzF¬Ç,~T‡Øí±; -}Œ)•¸Ï -Y”ŸÁýUczƒšZ uô#ñÜl‚˜Ohï -†0:ƒäålø=ºB2‘¦&«ÙùéjÙØÙ¼÷ZŽSÓ\iÖ$åÖE£Ñt>¸æP4äO† hP±Wç-Í1Ä]­¼·µÀ{†ÊpÌ檭㽲á0íT -²´Ö—ör±Ó„7ÊP“KNfÞÔõÓcñ:-Þù•#ŒÊí`F øÔ{I‡˜ï-ó+ŒIÜ›“hðlkêåÇþ.þ‘Ħx±ÓÞÖX_WpYþ! —M'űÖCÇÙ3âw:ÿ@9!7¥Ë%0®=mÓ%7âJÍåÔÅ1Û_h(;TYòÏApËw´ß#˱ì?\Hâ9ÙḼu2 -!ÚªXöïl­°ª®s?2>¢~Ȩ¼è•ˆ¬IŸ'÷⤢ϱ‚Äðüeò -]X»%ë&Øûcg›8ò³¯À}¸Ü$ë…‘ÉYÕÆ -endstream -endobj -1891 0 obj +~Æ8“mIÉ–ðé&¡+«Q*¸X]œs´Ÿ/¥¥¦µà×üh!à#ÄšÝ;¹]cö#õo ò™6B%ëÌ·™¿¨ôO1žÅ¾º$˜-—×MȈAµ˜ŠçŒ²i6N‚Õ¤ÝYvBo¾_€â:RÈç_Ñ!Sïu  Ã?»6³Œèa–¨ƒˆ)ªrçÚ³QU1H] ë4ä÷Œ¿é4²–À#‰"mWSñSªÄØRmQ·.ôÓ€"TX“±LÑ(`ù1p=¶›ì©NóW8átýXþuºã6>’•Ûuåå˜,í&¤O1+Ûk›‚ÛRJWþ\0 oArœ@Ê‹þ«›P`ˆÛ<ùœxa:ö T éªÝÎ6yd·±iªg>·‚âuí¿àÄ!º¢ûúÖ(©þEÙŸB:‰û¶î™­΂«OŽ}€ mÆi”Zè†a§Nv‚d ¿Î²i²·D·qBüÊ5šõíÈŒ[$œXÎÕÆËÏc´ý"CäXHY’Û(¾Q^Ûz~|˜x1¾ó"dVêZx\Ý2nXþ%¦I ™ +ˆ‘¾_jøc‡9…bâæã‚ã~Ðîªq‰ÓáÉñ…JŽ?O~Ý¡8RÃÒ{Ûa{ ·ýWA<ª8[¸]u˜/ÿ»S{!VÀh×¾m¯áóÒÆÂb8@÷Bn]'5¦Òþ—¸ï6«½póÌošìïd4¿¶õusñU¬T0mÉþ¤tü]XÂÀ9¦{ƒk2u~Q«½6Λ@Ö0$_b5û3!ËkÇÓSHÂA:¯“³ÀÆ .Í/ßæ€Ä¿éãäðLR&i9±€6^ËÀ”Šç3õz–y°A1¹»ÎQæ +\s†}Ö˹Óz +YÞÈ£Ô_ÛY‹ +XÏá ÑÓª¸Õ&*õl@<éZH~ŠBÏõc¬]67Vw8¤j/¹L‹nƒ{sÞ$Ékü|h`z€ +ïùíëÂe¡^lî‘áö÷/’¶‘W84ó ×*¡Ös_#í+ö: ÑKt3Õ*Œ¡ëÄès‡¬âôÓ Nò ³••eŠ´)îõ5o|ÍäòýógZXЩ= +bnna„ø¡x)JÄƳHÓÞ¤»áÑœIãv?Ú¢F‚Å•W¡ãe{ ¿:Ôùuбãçžs7«—²ŒæAôV˜5ÁWªÞv>Á”å€"è<@ö¿D&ODTè-Ê ·LƒÌ/d ¨o)OWÆôS«­sx¨Ê`‘õê|¦IN–µßÉ¥—‘…©¾ä˨hÓR­Á +VNoÕ2-ì9ÐÝs®?&p'3¤Nµ³³…ÝLîçLöÇ4ZXËÖ+µYŒ’P¢ÂjNæ'.Áó›·à\6Í æB%KróZ”"†Ë Ø” ×.ËUþør¡P‹y±g&Õ½i…âkÕ +>tëZn¸yô§ôøA%…K™ÄèV¸sß–Y›I앇pjSµÕÝBDrn±|úˆ–ãi¢‹´€ ŠtÛq¸ +Ÿ/Ôf¶ˆÄë[35ñ +nudJRÁ8ûг£’†@ù{nfТU{juÕnú;èaË`”ž› s˜E2‚)…}Uex@ ;ÁÿÓÑãÈzȧmY4^ó TÕ¹®2Í@&ôkê÷ƽkÒÄ>Gï*ÿkå~'äp\f±…z]Uf_27a± kò/]†žitÉ71ÝÛú + <Èvt¥’ÙœßÐxaup;5(­fV»eºWôˆ·cŸž‚û³ÕL)ôÁf÷ôã +Î[pÿ6B¨1B*ˆwDrÛÒ|/)t­t”â+qÓÙ‹jõ)Mˆñ"{uTÚð}=ÝÞ@©Öméè‚xÖ±.fN#3¦»’›}$uϱœIá^¼jiVGõ‹±¼•w èJ‡²éA]Äàmíur˜ v…@¨»Ó¯É ¥ÇíèÜ,ÌV4buaLûÂûž]Ï +ÚFýÓmæsa-ÓŒ°Pf$í&²ü~iM Ùî I¯wlwšF¨§»r8X'U%Ž~ýÉÂ1¶úzÃ!ýœ.T[Ô¡Lã5ÝŸ½Yq¹U‚8‡6ýól´"’ù#$NðIïAý8*öîyxÿñ<[ýx™ïðý…“׸=Òlõ-׃1+3˜õ/éyò12Á&ä•êZƒ”*°¤›ò/ém‰Ü+ŠµåmLÞßSbz ¢É +ö+Òû”·Ú%+K«ï,aY3à$rçPu--ï¬f´tö¤¹6†ÃI©pGjŽŽµ‡!ªÂð ô–£ì›`¡€µûÙuk"³:A$â&@'sèØß/«í6¸‘½+5Û·ÎR¯­{åÙͤ¥}U4¶—=x„èsw¤Ò]ìwêp‰”ä1”nfªàk£)±ÑÜè[ŠÜü*GëJåÖý™..fÔDTe*"«ÃêÙ„Î ÜÂyäßXGÍó„WiJ-ôÏïÏcZ0Wn½2ÁV Á=ÀcÙyïFÂ(óÏ}ˆ~}á=ÜßCí2±¢Ê°èÑWö¾Ó1›]Ü1ÈoŽÌ7\˜‘Õ32A§4¢7ë7M4Æ¥*‚VoÉg¬?JºNo[,<0[ð²Õ¿£â¼ü|Ði&>BB)\Ì5ks¹ð'$rÛîKȺ8ä#Ë¥ý‰r–0ðY—üS‡•¢´afOíþGupzqÌ%‡ÈîÀzÊà ¨ÑÙ¦]‡Éw‹?‘À+oâBÅ«È@%’6*„¦:k"¹¬TÚ O‚Xª£U? æô¸LÀbž†cÕ|…_³ç&¾ƒ•FúgRÃÅ<¥·JòA^ÄÉ%«6´ÚþìX£µéO겞ゕa[QF‡Çr'aD ð±èêPkèç\6¯“H\\qξæÞZ-¼~ŽXª@ñ§ñ*ºÇÃN,þõˆ‡]±ô6üÁ[yÛ˜Öéúz¯[îy š¨¼ûIèÂQ…–ÓùåÆI»*0 €º»Â^<àXn/ófœ,Ò…Ós0¶ŒaÖâe™W¡ëjqHòmÇÿ§=kM6KbL*)Üà„µû+öG·œE +ªÐP6 uoäW5é8"pÄuHÍ¿}=žkUÚQ¬I¨å\´ñìWÝñ”¶DsYKÄmʇ`7]*½»nkº°ä +®~€Ì}•.„!Fƒ![¶ižX•2aÖ:A¢¢Ç÷ˆ"ß_Èåü(|­\ V»B‘‹-X¹f„³òºt¬§ÎÞïF‚åb ëýêáÈ„æWg6‰?’»÷?\‡Hô‡z_ÄY‡*HɽAÁ,¤=åõ»áÓ+Ó¡SaµFµ¹,i-ùvÀà1‘$xñk§ªK{gµîvfŒ“¼¤zÚWü2òo“ªU…¿§‡nR¿s7ô©–¬Äb j‰‚‹ +º\É`,ÛGôèÜ +,®-ãL4B¹h{o• Tâ!qgÆ™$ D³Záô”ÂïÚÖ¨Âr¹­\¹m0Ãg`»";°Ëͨ7>oæ¡ÜÿP ++W¥)8÷´`Gx{Æôê«Ò~Úm_`ÚË{á¼û› jªùþæ·HàE'ÙU]Ž‡ô à%Ý™´­]É…ñ ÁϨú:¢Ð¿b ¥°«ø–úâ{*–ìpäûEì¬ôëŠÜniÈaψkšk°†V´ÿ5icj^‡g‹.æÁ>šÚŸù +WQî¡aRD§ÃWG¨×í ÿùg§p‘ý€ý•ùApV=ƒüŸc%Z…¸€;bjvY²¯à¬Í~-ó¯²ÓøöG¿fÀü¯Ü.K.JÚÅzÄèƒN¥á÷tqqeºŒÿ0½ÉqŽ¨áÿí¢OÔ ÿzkkYR-Éß+eÃ;òUÂE+èC4;CÅÉ©ÿâd¼w] + ˆ“À¢gÀ¼.ª…Ëük…¥œÝãÜŽÄä&ö³{Ç}z<Þ=膃,ž•}*:GS !åX‡-CÊך1eÞ Í"gYª-¿ËˆßØ(‡kÕ?¦<̤¬•–'g³wX;¬ËA† ¶†BŒj€Â|b⺽hKþÉÓ1™+$GkpBjÊ°×#G¬ÇoAñÀâ°¨¹8£g T¨¢Üõ´3ôXŠ"‰œ±ç×e…a”3¾ŽKó~?*WøUÈEÓ;&º.™ÌzØFo™Onò"¸ÕÊ Ñ0;™×Ç*¨ƒ|ÅèPO"ŸB‚—ÊëðÌ烃Hj4ÎæÏŸïÿ.RZÅç™F¬îc›‘*]‚{׉¾Y—?#,5fùÍ'®Á@ À‰åRõ3ýærijµ•>,\ϵjŒgÿœÂ]~²5;$÷PQɨ§çß­Sæ +r+‹eŽ@ñRZ +»îñÈYÅþÞÚv’ˆªŽ¯¸âiJdt˜«\`ª¦ž$BÐN„ö–¶„g—FÓâÅèšvŽZ~Þ\PNÈ#pÞ:ÂiMEA81$eiHrÒc|@Ï–™-JžX"ÀÓfÐQ÷D½éçI/ÄGXÛdÞÀ9A¹ý@F™b‡ÜЇ ¹}…óûïî!+èÆ*¿ow?ÿÙŠg;Âàk0¾~$·!yŽ5ç-a)ÎHÀ%@†rKL¥xÂÛ#µK¹»·KÕ9¥c.O;Å:R(óN½¿?Å*ꌘG×’§á©`=ø9Ì:i¾H0^Xê`,„ØPô÷åó£³[îÒK~ã,ed?!rH­2ͯ‹Òvßé…EWÚC<®Þ—¿÷õöꀞ‡ñêS™Dvq›Ö­8åÒƒQ~ð?‚ Z;0Â|ÇPyŸÄz.¨Ëñ±_æ#ˆƒ +Ú{Ïï‚8 (á¾5LÜÛÍ< +ñ¿+ôZ™VÕô»mJã{ÅWxOCدfÓŠíBÒå˜ãúÿHH8ÁêZ·\-ö¾ÑÖö9EÆ`»ÑMùIÄ5°•F÷In8ÓŸƒbGÌa@N äÖ²„Ñó#ºƒ±³Rãõ½ ÃbɘÀ(õÈ¢­ªJ7ÂFÁƒJÅwý“ôOÛw:}žÙ3æi¸y!)¿É.°]¯1.ƒ(¨¦Y˜l«øÄSŸ0Q¼Fo€ÌÅDñÔ ÛWSíÖu“![/#´˜© Â(ÂËrÉI’IºŒS&ªäA¸Só§xOø³Vb‘~Èô[©×BŒUªùÃÆ!j%Ïm3ÇL$8”õ,¼¥}èlðvgõ÷ôã>ýûP––඙Owtp%œÝ!\å´+y$péVñP­V $så g& Þì{€a¶·UÌÁ>û çloÑ5Ó¢[¦E¦½÷» /( ]ìÏjËýP•K:a6﹪C2KÈ­§ *w-äXWhš½@.A´’ŸìÎò»c7”  NÚPÿó„©i(¡)ýÀêä8s-a-?p•ÿu‘ç„Ù£E>,}k’ó·#CO¼rÃÔ´ÁîHgŠ5’8>¢þÝD"ŒÆ?±À“jð¸¡:…fÙyïFÂ(óÏ}ˆ~}á=ÜßD£,Au¶¿²yN®×!¦ÅF!ñk>F¹Âßyð‹æ±¥ê+ù…™/êÑŸB’Ž&?ÂŒ©bº¹ +˜A”&Ûèæ·›Aè饘YlÀ¿Ýw[´¦úmêšíHÁEÑErÀ¬áå64¥C|û?ûžÉ„!ªÈÇ%~ê„àSwÜDSbÇéöLÉrŸ~#zžÃ,„‘Ë¥îä–ï³Óy +%ªrðí®ØQgÞB +ŒÈKÀ<æ‹`hûž{õ¼"F7#Ù°š¸&6t1Jø +äÂ)+¸‹êœið®Ðx·ß –\X3…I#Ö ~3ók ”Ëwf襪¢Îß ¸Få%«%çp¼ü[ƒ&ý½;Ó~¦ +¦Þ¨‰ÊÆ16=Or ™Ïœ´ÁÅ 94w2›†Áy›yó;°œ¶wc~ ê´ ®®îz¬Ö>b3ü>FžgU0à‹c(û‘4`²Ø’4/#Ã×Îuo²:ïm´0B|X•Ptl£²n¨2…Œ2]æËYZ»vX£í<ªwË}ö#bã$Q¶› |¾ûU•ÿÉ;Ë·îÛ´3ók¦pÔî5ãô5*ŠºO깿F¡>ÌYG>Öœ…–Ú' tE͸}pÕûŽßHÂ¥+1Àå ¹š£6,ÅC&±Fcã>¥ßc¨ÑR.“ ¬Œèš:Ç-?àß2ñ§|²Nõýü¨¾8TšfÖQK"É”»þg±sqíaã+j 1Ý«=yD +ÙB½Âã:¾¬E «"´<žvcÎÔM¾@Ú„õðñƒ4»¬ @µÂ™‘ÖwÁ/RCú¿|«Ñh–%µ3ä*ó*ù‡µlßUÿWá[_õyŽem‚JnïŠýƒÿµ`ƹlÌZO(F8µgÕ5Ê€rݽíÑùñä \d—M‘?œ6¥C²pRÄtY†+?’ìÍ8F3ÃX={(’éD«¸÷¼‡Kì="ý )¶†MÀÖRWÒTc;½–ßÇã8Œ•sRHà`†P„¼=¸,íºü÷ëN0á°‰Jz:dV–›ÿ¥ô ˜¼¤·úª¼¹9>Þ–F²Ùù0á@¡ á +%¨Ž^Á•u°ÇK  ÜÒ¶N#F:3ý=¼^ä­¬t¶môñ^b RƒÇÇÆzTkX0ìi6kâfwš“{$~Dñ¶ãkæ\úæ ZRïysã:õÍ¿%ùFòŠGmJ›Ý*ÆÏèi5½kÇ$Di3è‡ÍŒh® ?p|cˆîF3(ú7 ÜþΑdÊ2uâVŒˆñe˜¸5 ¹>¶á=£–HmÌúI¬ e´Ì¸sÝc¡¿0뾯Ÿì,Z÷ÒEŽÕû¹J²®É[: Sí|(¯+ë¤s«E ºèìÕ²£Y24æ—îñV%ëvDá«5ú5õ¾~ÏI}ÑàÓIEb, o·Ë}!ˆ¶ÎÆ»Â,ëÕÃÍ×ÓtEÙÜUЦhu†BÌ”¼žFíŠ'ím…õL)Õ$ö¼ùÓ&•$a p&+ VUpc?Üå’')‡](=õÓí‚Eo_ðÝ©Û—)†Î4hxå±N,®½¾§“凙š¼{ÕÀ'D@‚J•¶ B~lü¼18$oó±¸ê€ÿÖ“aø¯g~lïû8‹mÒ¹¨ö†ÖªI,Úˆà=0cû}›r°°{GR®¬fû6gþB%,èëpiÉæ~–ê‚^»©.Æ! Oa=³ê½.mK!’B¹#`?z¤AL×SDòÜ®iàþ/Ãæ Øwí¶‡ü´Ç$¼þÙnh™éØd¾=ÜŒB0÷è÷|*D†]—î¹ïÔ«|AZå…5(;Ò²#¤œÚé„—ôú|PHÀ†!@ùÎV0ækëÈ7%¿ãHBh5>¶¤¶‰É/ÅHÏ–]õ.Ü£Æ_»¦± ß];1Q}Lrâ¾ß©æãåL’ßþP®Å8âÆòäa˜4þ±âB–pxÂö?µ†¿ß\|ûÍrëÉ™sÕWQÓ]e +G¬Î÷±‚ûÐÐTbj…àG +Rg6¹h›_ÇîüMåäu]ƒ¾Â¦m7‰oâT·¸,UF›’6ª³HŽKá2:Ö_y|=²Ã„[[ÚÙbˆÛ>uÚKŠÍL˜ Ê º×þ‰Ö¨K!&ÓëHwºp¥‹âsIÝ”ÀxÎL©ºj]¬×ÐKúáUïáíßI¬†håƒd½‰Ñ>$çK• ·ŸûÓ È–ûÆÓ£|D¥‹c¯å˜Ãfá(ásºÛ³d8¹MõbCqFµ™m !è˜rýVJË6ËðX¦ÇfÔJ/½ÄíÚÌÁݵ.mÞxÁtl&¢£D«fA˜S,Ëéþx¼äÔ%AÒ0ùušÌÑKfÉÒ ¶1óú(±ÆµPÝTÁßÃä@ø‚Ï€9MÙHm²>6ª[Á2nˆrn-og#R¡])ö™l;nº1Ý=“Õ0’µQè¹ØÓh4èGžhwžZ$»‚°$â^ Ê¤Ã¨8—‰¶ºn‹Ýòµ8¼$µ&ìÜ +ß +AzÍÞüö¶zl öóä‘èŽS yÆBwœMS¸ïóGF}u«Ÿ–—?7Ážä\û¬?›<šS´LÕ]47€€:OóÔc­° x‚àŒpP²à·ì›~$k<äNeðÄ h$ßÀû'Ÿ†®l î ñä5q“V•Úå³®Ó@0ßRi´Mª+ü«ª¨è~÷E’¶û4¿¥ËoT€e[ÂÝcÒ¿-¼éÆ£‡®LpAR›GëbÍož uõô¯#ZýJòb-¶à¿c~F3ÝKú¥öÀõÿjzØ·pÑP}nIx>ŸBºù0«6Án!ÈiY QÇÓ–‚Î"?•è&èÃdúq0QúTƒ#dŒ áì÷p8#Yª1b2 4 sZ† Ž­ÒÉßÔ,¬Zò¼D#òüMíjÓÒ²Ù§B"ÙǸê!S¾žæLÄÒTJcj€M/ÖàÖ8,6ò‡ÇàŠM¬ õ™@æÕç¬NÍB<Õâ¢ûOìá~CìxôA»áÁ=~®ÏB™yüÌ+s|̺ƒ:Ãmný³æ åO ¼Op´½ãàùàš®’•äRLŠè}ù©º­¹°ȼð˜t¦Ô­ +qOÅÜá¯s•ðžnrÇZ'ùD­#Tµ—ê©®kVB3ê&¹Äç´R;;óa«sµëPxVu¿{ÚúbYÞ9L°Œ|–ˆ! }eÞÂnÞÊ7iÃÞøX„ç#wlë4W3˜L6Uª€ÈK¥Ù¼æUZ-î^È¿APW¾¬ÊæQJH”ŸòêPƒIò“a }bþ‡;™^ˆn‘âx³ËúåR¶"¹-.È_µ§†nÞG¦]~0JšËVÌ P{ 4°JÊìÏâñkø/j"í'{£ã¨¶zê(’‘VŒÛ”àkp¯mׂZl½V‚»- Sˆ$Wú#ý×]d¸M>mÍî^NÙ˜ÙVçù3ð·ÅU #t†Ð/«„ÑšQ-”}dºÞRpã}ñø?V­íQBbOHWÖV(bÿÜÅm&Ò|¾¥–_Ùš‚ßL€ævìÐõ$íðBîmŸ”jwú„ød¡ñ{1Ø8xœ²$zA¤8ÈО±Z#9F>†õgûÔ?é05`ZÔ1WâNÚ¨áÚ#?Ìøpó¯A‡1ŒNI„ËÃH‰€¸j)^J‘ÿÔÝÆñû_?¥ôÑŽ]ÈQMK¼3âÂ+8›0AesÆrM{x²ÃÛÔC)u‰$ZK©’RÿÚ¨[ȹ“!·3ùÜþDÕûþÊà_(t¸œ¿5è’UO<. s¢±mÃz6º ZôDšõ + º}¦]ûkÔý>#“ùPó¡Þ¾ƒ2ºu 3vŠBI,ìt䢅BÔQº´vC³±½»0ÎxmQP½D\ÍÿdÒRØò(¶L?‹± GÉå.åðVV›“ +ÉŸG@vð}W©¥¢‘žDºåø–‚lÑß¿/©"fÙ뺨Ì5,ÄùEë™X×{çÂ÷»6ðìÞÝú=1+ØUxáÖ?Ž[ð/™£d·šÁᙟáFd—¤UD¢u_“9±‡KojÆE½§#Ù…¤ÿ6 ð`WÂêÛ¹9×· ‰¤{Ð0ˆú_RT*‰ï ™v.§ÝŸ ¬Ò·•7˜ +§Èèv%.JÉZÍ0ÇÔF= Šu +öÌfcÖw•C!!}«¢LaGxÑ`G +¼xP÷xšòLñUí=?¯S0ã®S&Þ^¯Ú›® õ¦HóЧ›¤\©ëÖMd¤§éx%‰ÿÝ땇îÏŽ•Q´÷E’Å”ªã÷mH âfîÿ»»ô°zm(qþñ6.Xê(ò|ºi3ž“>'›tÆ'EÔ¦3OîƇMÏë‰%·/ð˜íäôä¢.£n†ûýÊút™¦ôűìA &9p†„gµ>‹·l¨F¥Eò¹[äš[šMàÉN5]Œ$ÿ»– b„gí ª†5‘V…½hãÜðî)/Ò8#—Íl;ÓöòçC!Òì5O«Þ“IˆK³<\Åÿ ®SÌYø,nÂÞ¶|lס5!š’Ô· `Ö—7ZåÛk¤Ä;Ñù²'µh=œ^K9|ÙQ¢©ò÷ NÐŒy@®’áx¿LG﮼zÀMØC~rÞ +Ë%5&˜Vð>føXÒŽgÈ_…8eÖ&[©údWgê Â5ŸÜBrP2Ò„ŒÐi ¹yïkOfc¿Îæ­†ù»Ü]t¾3f f„ÓÜ,}UÖÙÇYã. ,Ãâið‘¤úÞ=ÅÕOG=gªNûpùD´FóƒgÜ…`Í2§¸àêK7}æ§R‰XѲÿ!œt#V/›Ÿ!mü‘¬.Â*5nçkSGåt¼q!ò8…èmŒ-kU²º­ ýzªðN½°>Æ7’BTpùWÓâ '\Ü…h˜¼íÅUøÏ>r>Ä$ôc‘÷rš†x¶HsʬÓˆZŠDåD6‹ÓÎpÀž -%ü„YÕ¢‚Íä›í8 Ä“È[¹«Þ}G*Aí³yEf2çýɈ*Î)U´¿“Ggšv@zP¬”ë—Œù£fFÀ¦'ãøh4 {ît¶Vä›sË*ñÄï;Ò1¬ÿÙim©±Ó+þÒ®úåE‚Éß]¿üßĨýŽ£´]g¸_jB¢¿°½u§Ô«±õè™óûû‹;AR¦6Íø@¿ñ: zÒåQ}¯“×!Èæ´™ÈôuB£þ€ysÔæ´Õfdù,¶ðçz[þ’!•½‚ Q|À…Ì¢Õ×D9¢z%ø˜øvŒ§Õô²fbó©cÇ”YFzA«rÈša†0¡Â6NYOïþE–#¶‡ŒËÝnotœ³Ì-W£Úγ>Ñ7Ûy|Îò™(:Úã ÛÕ@a +&~=íÂÓ"M;¾µ vŠëv9Œ4%«*r×AŠìÛƒýkåcgrú›6ÏÓ™‡OlË"ò–ŠpxA‹>ü½9t|^Æù‰Ä~ô´9³ÐTÂy½…•Êj?Ó°¯ñ’¾ì§ûÆ«@€TP 0_ûóVx %äÞu68[âÑ/yá&Û1ÓaÙt?^ xO&ä‡Oµ©t® nú`Õ„˜&Èfûûª$_òÄ¢‘:§> + n'æîÑ>(‚^å?Q-ñ€bwÒÌbo²ú»Ñé¼[˜”º=ñH²ÆvÉÿþ‹ÙžÜ«p ºV¨k°VÖêåþrs*ÛP°ÎfÝ1=)Ö ­ãlz×ê…%‡ ¢A"L£9#Éò8Hï³jÿ3×ñ:5å#:Yfv‚j¹OƒÅ&áÒþìLÔSƒB¬"è¦Aæãz€¾¢Ù¤||<´Ü%àÚûQ¡AÒ?ÂÇ‹WaßË0Ù“šcá¸^LábŽ÷ñ(äÐ.ò ×?¾dHª¤ôøðäÐ&ƒ‘Ÿò ¦å+3ÊòFÚÊ’Éw¶“{vŒîÙ4%t-—Ì¡ƒéÙh)A…xmâc“%$g-Þå"æöOƒüÈq¯çÍE3áËá®É2„åzåeä’ߘ¿ŠL­:’=Œ{̾‚U{ÍK%™\®êIKÞ†Æ/jbìwJeëG‚°›tsëBBÃøõˆŠß5‡P®ß…Æß"x£.ÎÊêûÖu u6P¸r”$š".Wõà$A ;æ+ +k ,ÜïÒf¯B³H¢-ÒàH©!äŠZµ£;ã˜üL}¯g]J^s¼KX-!rSáf +p–îs|¹*Á¡÷#}ñûtu(~ø?¤H‘02yÓÓ¡^àþzW'¹›jC°&fv»#œ ¼äg9YºûfžNåg@Ò~°¾^Û†«Évå0øs k÷ôJiøµ®^öSõA±òÛ0Š ÁŒ(*{’¼“ÒÿYÑÝ¢IŒrðýüKB°!o?¿”ÞX, £Ê6P4 ÿ(óõ}rö§bŽED¿Çrx3§„/§¹Ì´OÙd¨/ŠÃÙÇÙF¡»ÊI¢¼†RGd”Ç f á}åJß)}Ìø)‹Áy˜zô…›ÔÅÉ6ÿO‹o~/ê–ìƒï*tÒŽ¡n¯x ’“õ9Çï°Kš6{Ó)ãe€_ƒ.ÞÀŒ‡^èeÞ)ÖËz„¢]Èñá/ϤŠMàYoÈô9,¢œ—Þìg£×TE눮}qtôºšGÍÛeÅX‚¥Ä_wxÃël ñNUá¾=wÆU¥w*ú5ëqï6¸Ù±]tR4‚6Œ[@[LJ*8Þ*ñèŸÊj‘Òës]p*4P+ã5[šâ +ÏùÖ 9¦}v·5¤$H•à”úúVbàöëìjíFaõý®?ôÖ}÷­ÀÀ„ì#bÉ5ö]ò*ýÿS"^ò6>Øù­ÊÓ}uFÀø¤Àa›d~QÚ¡d™z’‹ €–ü‡—Iø¡¤áo>Šún}ÄŠárËÀ©o 'ª÷‘¦Xdü`C- ¾Ó’duÇ‚TÄ(n¿v8Ímë³ß͘•ƒUÖ67ˆ¨áBYgøåR>ï‰`±l–Ÿþd³õ- +×ýc%ÄÆË£7£ÓÜ×Pä1xÀÞp]ãÒw/F¸N¡ºNæö’0àqûìÃeÂ7¯ê±åëYÁæÍÏË?Ù*KùPÏôp\þ|Âi HÓÕ¨ ãYNUÓ0ŒWsK9àæC†­Úº¾¹pS^Þ"戺•v#†iÙ"ÙʆIúCã¨Á"ŒIPA{QmtÑ_ØZѦnzE¨]½ U5Pd"ó[ùÃ`$k¬=ú̪G­ÖäË¡Zï‡ÿÉþÏfŽþ;ÇÿöíšþFèLx5†÷±†@>ëÊ æ£ÙkhºæVë»7Üèï +óÌ T¦@rrz‚)(å°]Ùß K¸ù+ؘfi©ÛÂDøŠÄÈ:HÕ±ëù÷ÞÞ»¥Â¬í‡Ž²B£WU!|5c}æ¶5‰R¥#Ó_훎ì¢Me¥z4ºQû:àîФÜàåký#ý8œA Pbâ—ÉwÆ™µhfOŽokï‘Z9ëè\©¶¨g„$o­Îà×.ÞϲWO~T5 ¨Gˆ$f’œÜÏ‹”“k½=r“ Ë/3Gq† Þ¡¤s­»ÅíÍŒ§)»vx*ýP*ÏÿÑ=ßÀcÑÀè~ÓáVô¸zíä7dNS×4B®ÅÐæS}©RNaVÁªPÛm„>ÍQ¿i6h%8ÀZÖ½±ïPGQqºÇÈÌIâph,‰1¾-Ãã…Â(†Þ°ñâ±÷ùÁ„çì¬ÉluU<ìh¸õZ๬rÉ ~%>—Ö}Imkõ|Ëë›㢠=ĺ©Y#Z¹dàÂ$Ú¾öÃÑN¹TDŒé”(gÉÃ)†u ßÄÿTÀvû#à&å“,vžq÷¿͹ |ú8@\Nˆ®.ÒUè4”ÄÅߢ?A°QÉ^éÆóp+¥™¡tŒün(ÏÎI`¸#Ǧ¬H˜hÒâØŽÔÁˆ¢0$%,K“µ®¢à3å*Z耘®1̃±²ÝYº9åû^¾5~\Á³*ºê™já¾7£ ó“o]ßÛ-”©N÷–É»5×Úƒ…f1Š!MÔPK§‘¶‚«°™ 7fÍ ƒiEÅ:Èka¸áD‚SœV +ôᦠŽÞÑ«ª¸™BÜZ*p ¨‚ã"Ô»[´V{•»V!3ú6•Ö>ùæ&lÝŸhæΟv¯Çè6ÊG¾ýC™„êÅ&ÕL¾mÖç¼Ψ…™bÈОÉ{³ÓÐíî »6½nS=µîo–;J¶Û§.'ï£R’ËÈõN œ–È¢ßÓâ[÷šƒ‚\Z^ß…Ù„ õ™ +­%–ëXZÜÿçuqË +«ágÕµ~ƒúíÖVÆÂ…–ý»Ve;˜[ÝýÅÁ ÉLMÝ ±ôÐ}ÈfŒŸÏÑ`t‡š—Ÿ7Ë ‰Q˜È4úŠ¶‹VZ#¼ª +’  +ýƒíë.¦Wànd¨E¾³ŠK×™»ÅɶªQÃ\åÜàØŽ¬Ý°y +Æ4à£øÕèß|)’Úñ_ˆZ}ûóÐ> R¸õY +ST‡¼ax5¬ µFõÅzÕ>þa~zë[KÊQ÷ÔHD~YÖ­kÕ‘²‘Ûsø¢ÛŽ•Ø›¿¤´fœþwŒüÒw'Q½üÔ¡þø,À¬0ts‰Ý¼h:ŠSqÏW°Ò=E2Í'jˆþŸAr+…Dé’!}AÑO:YEÀöT:Š€¸¹²c=öóìn O¨}Æí¤C2wp£rA˜CšDêo Ò )Àse4›uNôã™}ü¦ŒÓx 1šåÓ~Xð³§΋[/B™ “BöøR +î~çsÕ¤4(¥ Þ²î‰`Cœé9’¹•ŒôÂ:E"²º–€IY~&4£‰RYæ¶&{2Šý$±GÇrV›ÀÚâ?É›ãµDVàêkÁ—Za¨Æ4ë%›O*tÉÏ”¯Æâ,!«–N²U¹ÿl×*‰ÖÌ[=a§»#*‚³–RZ +Šãø¹ y"]M¡ÐcØÙU…%˜»Žƒ/TJ ͹Æ2„PËH~ëH=yišÍÖ1÷9× x {ááyÚ½ë·Ù"“êÿú²S¦YÐ^{Ķ3þœž÷Pk<ÈZ?\û/ß2Üw)€%!Z?ÑúT0F :ƒ!WˆuùÖ „óá¾7Ìî8 O7±ów#ÀÄ¿¤ßzÂ)¹>×…VŠh&ksLÆÊcjYÇFír­ô|#3 #Õá:^7 X÷p²š¿lTŠï6#¯ð}«1¿½\]Ó=U¼·¸Cîd»øøä_t}ô¥ +åˆ3©•ä!š4wîQ+¯æITÝÁ —ÒŽÝR^n¼ÿ¤bÏάU#mÁ´¯;]§—Ho7Ò½éáŒJI[ͨ!ªNIÕú˜Œ¨÷|Lb–'æÄèDÐð3R@ñ#Ü;ÁÕÁ2¾˜šŸÿ—hÓŒLÎèŸÍ8ýÕ?/ 0|—¸!ó +Æ€{qK—ÿýðêT°%ÏÄÁç4 ~T’è:sãì# þýu B«K²¢Þ@§wÕI5­³=ð¡•öu³Îm9¶.tÑž2•5ÈĬa”Åpˆ5EÕûÚ4D(á€ÁXmè‡ù{>Ìì•o‹:=T¡AŒB[’œin«­áQ¦Õƒ‚:©ûþÀmôÆö¨.¸åê8¢U…E%x~˹>í£ñ:¾Ö@CQBYÑ: +endstream +endobj +2238 0 obj << -/Type /ObjStm -/N 100 -/First 984 -/Length 16452 ->> -stream -1887 0 1893 120 1895 238 561 297 1892 355 1899 449 1896 597 1897 742 1901 889 565 947 -1898 1004 1904 1098 1906 1216 569 1275 1903 1333 1911 1427 1907 1575 1908 1724 1913 1869 573 1927 -1914 1984 1915 2042 1916 2100 1917 2158 1910 2216 1922 2363 1909 2529 1918 2676 1919 2820 1920 2964 -1924 3109 1921 3168 1927 3315 1925 3454 1929 3598 1926 3656 1931 3763 1933 3881 1930 3940 1935 4008 -1937 4126 1938 4184 783 4242 1939 4299 831 4357 830 4414 789 4469 790 4526 805 4583 786 4640 -787 4697 1940 4754 782 4812 1941 4869 1934 4927 1944 5021 1946 5139 946 5198 817 5256 788 5314 -785 5372 781 5430 784 5488 1947 5546 1943 5605 1948 5699 1949 5719 1950 6070 1951 6101 1952 6260 -1953 6283 1954 6638 1955 6767 1956 6890 1957 7536 1959 8007 1960 8638 1961 9109 1963 9684 1965 9909 -1967 10157 1969 10387 1971 10633 1973 10957 1975 11459 1977 11691 1979 12063 1981 12289 1983 12520 1985 12988 -1987 13540 1958 13931 1750 14341 1682 14484 1360 14627 814 14768 813 14907 812 15047 870 15188 915 15328 -% 1887 0 obj +/Length 1007 +>> +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmex10-builtin-0) +%%Title: (TeX-cmex10-builtin-0 TeX cmex10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmex10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmex10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +1 beginbfrange + <03A5> +endbfrange +31 beginbfchar +<30> +<31> +<32> +<33> +<34> +<35> +<36> +<37> +<38> +<39> +<3A> +<3B> +<3C> +<3D> +<3E> +<3F> +<40> +<41> +<42> +<43> +<9F> <221A> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <2126> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2239 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R /F52 585 0 R /F83 813 0 R >> -/ProcSet [ /PDF /Text ] +/Length 1153 >> -% 1893 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmitt10-lm-rep-cmitt-0) +%%Title: (TeX-cmitt10-lm-rep-cmitt-0 TeX cmitt10-lm-rep-cmitt 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmitt10-lm-rep-cmitt) +/Supplement 0 +>> def +/CMapName /TeX-cmitt10-lm-rep-cmitt-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +5 beginbfrange +<07> <08> <03A5> +<21> <23> <0021> +<25> <26> <0025> +<28> <5F> <0028> +<61> <7E> <0061> +endbfrange +35 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<24> <00A3> +<27> <2019> +<60> <2018> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2240 0 obj << -/Type /Page -/Contents 1894 0 R -/Resources 1892 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1865 0 R +/Length 1535 >> -% 1895 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmmi10-builtin-0) +%%Title: (TeX-cmmi10-builtin-0 TeX cmmi10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmmi10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmmi10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +20 beginbfrange +<07> <08> <03A5> +<0B> <0E> <03B1> +<10> <15> <03B6> +<17> <18> <03BD> +<19> <1A> <03C0> +<1B> <1D> <03C3> +<1F> <21> <03C7> +<28> <29> <21BC> +<2A> <2B> <21C0> +<30> <39> <0030> +<41> <5A> <0041> +<5B> <5D> <266D> +<61> <7A> <0061> + <03A5> + <03B1> + <03B6> + <03BD> + <03C0> + <03C3> + <03C7> +endbfrange +48 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0F> <03F5> +<16> <00B5> +<1E> <03D5> +<22> <03B5> +<23> <03D1> +<24> <03D6> +<25> <03F1> +<26> <03C2> +<27> <03C6> +<2E> <25B7> +<2F> <25C1> +<3A> <002E> +<3B> <002C> +<3C> <003C> +<3D> <002F> +<3E> <003E> +<3F> <22C6> +<40> <2202> +<5E> <2323> +<5F> <2322> +<60> <2113> +<7B> <0131> +<7C> <0237> +<7D> <2118> +<7E> <20D7> +<80> <03C8> + <0020> + <0393> + <2206> + <0398> + <039B> + <039E> + <03A0> + <03A3> + <03A8> + <2126> + <03F5> + <00B5> + <03D5> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2241 0 obj << -/D [1893 0 R /XYZ 149.705 753.953 null] +/Length 1291 >> -% 561 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmr10-lm-rep-cmrm-0) +%%Title: (TeX-cmr10-lm-rep-cmrm-0 TeX cmr10-lm-rep-cmrm 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmr10-lm-rep-cmrm) +/Supplement 0 +>> def +/CMapName /TeX-cmr10-lm-rep-cmrm-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +6 beginbfrange +<07> <08> <03A5> +<23> <26> <0023> +<28> <3B> <0028> +<3F> <5B> <003F> +<61> <7A> <0061> +<7B> <7C> <2013> +endbfrange +44 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <00660066> +<0C> <00660069> +<0D> <0066006C> +<0E> <006600660069> +<0F> <00660066006C> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<21> <0021> +<22> <201D> +<27> <2019> +<3C> <00A1> +<3D> <003D> +<3E> <00BF> +<5C> <201C> +<5D> <005D> +<5E> <02C6> +<5F> <02D9> +<60> <2018> +<7D> <02DD> +<7E> <02DC> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2242 0 obj << -/D [1893 0 R /XYZ 150.705 716.092 null] +/Length 2050 >> -% 1892 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmsy10-builtin-0) +%%Title: (TeX-cmsy10-builtin-0 TeX cmsy10-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmsy10-builtin) +/Supplement 0 +>> def +/CMapName /TeX-cmsy10-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +27 beginbfrange +<08> <0C> <2295> +<12> <13> <2286> +<14> <15> <2264> +<16> <17> <2AAF> +<1A> <1B> <2282> +<1C> <1D> <226A> +<1E> <1F> <227A> +<23> <24> <2193> +<25> <26> <2197> +<2B> <2C> <21D3> +<3E> <3F> <22A4> +<41> <5A> <0041> +<5E> <5F> <2227> +<60> <61> <22A2> +<62> <63> <230A> +<64> <65> <2308> +<68> <69> <27E8> +<76> <77> <2291> +<79> <7A> <2020> + <2295> + <2297> + <2286> + <2264> + <2AAF> + <2282> + <226A> + <227A> +endbfrange +81 beginbfchar +<00> <2212> +<01> <00B7> +<02> <00D7> +<03> <2217> +<04> <00F7> +<05> <22C4> +<06> <00B1> +<07> <2213> +<0D> <20DD> +<0E> <25E6> +<0F> <2022> +<10> <224D> +<11> <2261> +<18> <223C> +<19> <2248> +<20> <2190> +<21> <2192> +<22> <2191> +<27> <2243> +<28> <21D0> +<29> <21D2> +<2A> <21D1> +<2D> <2196> +<2E> <2199> +<2F> <221D> +<30> <2032> +<31> <221E> +<32> <2208> +<33> <220B> +<34> <25B3> +<35> <25BD> +<36> <0338> +<38> <2200> +<39> <2203> +<3A> <00AC> +<3B> <2205> +<3C> <211C> +<3D> <2111> +<40> <2135> +<5B> <222A> +<5C> <2229> +<5D> <228E> +<66> <007B> +<67> <007D> +<6A> <007C> +<6B> <2225> +<6C> <2195> +<6D> <21D5> +<6E> <005C> +<6F> <2240> +<70> <221A> +<71> <2A3F> +<72> <2207> +<73> <222B> +<74> <2294> +<75> <2293> +<78> <00A7> +<7B> <00B6> +<7C> <2663> +<7D> <2662> +<7E> <2661> +<7F> <2660> +<80> <2190> + <0020> + <2212> + <00B7> + <00D7> + <2217> + <00F7> + <22C4> + <00B1> + <2213> + <20DD> + <25E6> + <2022> + <224D> + <2261> + <223C> + <2248> + <2190> + <2660> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2243 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Length 1114 >> -% 1899 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmtt10-lm-rep-cmtt-0) +%%Title: (TeX-cmtt10-lm-rep-cmtt-0 TeX cmtt10-lm-rep-cmtt 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmtt10-lm-rep-cmtt) +/Supplement 0 +>> def +/CMapName /TeX-cmtt10-lm-rep-cmtt-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +4 beginbfrange +<07> <08> <03A5> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> +endbfrange +34 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2244 0 obj << -/Type /Page -/Contents 1900 0 R -/Resources 1898 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1896 0 R 1897 0 R ] +/Length 1109 >> -% 1896 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmtt8-lm-rep-cmtt-0) +%%Title: (TeX-cmtt8-lm-rep-cmtt-0 TeX cmtt8-lm-rep-cmtt 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmtt8-lm-rep-cmtt) +/Supplement 0 +>> def +/CMapName /TeX-cmtt8-lm-rep-cmtt-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +4 beginbfrange +<07> <08> <03A5> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> +endbfrange +34 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2245 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 573.77 387.374 585.83] -/A << /S /GoTo /D (precdata) >> +/Length 1109 >> -% 1897 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-cmtt9-lm-rep-cmtt-0) +%%Title: (TeX-cmtt9-lm-rep-cmtt-0 TeX cmtt9-lm-rep-cmtt 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (cmtt9-lm-rep-cmtt) +/Supplement 0 +>> def +/CMapName /TeX-cmtt9-lm-rep-cmtt-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +4 beginbfrange +<07> <08> <03A5> +<21> <26> <0021> +<28> <5F> <0028> +<61> <7E> <0061> +endbfrange +34 beginbfchar +<00> <0393> +<01> <2206> +<02> <0398> +<03> <039B> +<04> <039E> +<05> <03A0> +<06> <03A3> +<09> <03A8> +<0A> <2126> +<0B> <2191> +<0C> <2193> +<0D> <0027> +<0E> <00A1> +<0F> <00BF> +<10> <0131> +<11> <0237> +<12> <0060> +<13> <00B4> +<14> <02C7> +<15> <02D8> +<16> <00AF> +<17> <02DA> +<18> <00B8> +<19> <00DF> +<1A> <00E6> +<1B> <0153> +<1C> <00F8> +<1D> <00C6> +<1E> <0152> +<1F> <00D8> +<20> <2423> +<27> <2019> +<60> <2018> +<7F> <00A8> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2246 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [320.317 498.054 387.374 510.114] -/A << /S /GoTo /D (precdata) >> +/Length 853 >> -% 1901 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-fplmr-builtin-0) +%%Title: (TeX-fplmr-builtin-0 TeX fplmr-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (fplmr-builtin) +/Supplement 0 +>> def +/CMapName /TeX-fplmr-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +0 beginbfrange +endbfrange +20 beginbfchar +<20> <0020> +<44> <2206> +<46> <03A6> +<47> <0393> +<4C> <039B> +<50> <03A0> +<51> <0398> +<53> <03A3> +<55> <03A5> +<57> <2126> +<58> <039E> +<59> <03A8> + <20AC> + <221E> + <221D> + <2205> + <220F> + <0237> + <2A3F> + <2211> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2247 0 obj << -/D [1899 0 R /XYZ 98.895 753.953 null] +/Length 1113 >> -% 565 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-fplmri-builtin-0) +%%Title: (TeX-fplmri-builtin-0 TeX fplmri-builtin 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (fplmri-builtin) +/Supplement 0 +>> def +/CMapName /TeX-fplmri-builtin-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +3 beginbfrange +<61> <62> <03B1> +<6B> <6C> <03BA> +<73> <75> <03C3> +endbfrange +37 beginbfchar +<20> <0020> +<23> <03B5> +<24> <03F1> +<44> <2206> +<46> <03A6> +<47> <0393> +<4A> <03D1> +<4C> <039B> +<50> <03A0> +<51> <0398> +<53> <03A3> +<55> <03A5> +<56> <03C2> +<57> <2126> +<58> <039E> +<59> <03A8> +<63> <03C7> +<64> <03B4> +<65> <03F5> +<66> <03D5> +<67> <03B3> +<68> <03B7> +<69> <03B9> +<6A> <03C6> +<6D> <00B5> +<6E> <03BD> +<70> <03C0> +<71> <03B8> +<72> <03C1> +<76> <03D6> +<77> <03C9> +<78> <03BE> +<79> <03C8> +<7A> <03B6> + <20AC> + <2202> + <0237> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2248 0 obj << -/D [1899 0 R /XYZ 99.895 716.092 null] +/Length 1477 >> -% 1898 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-pplb8r-8r-0) +%%Title: (TeX-pplb8r-8r-0 TeX pplb8r-8r 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (pplb8r-8r) +/Supplement 0 +>> def +/CMapName /TeX-pplb8r-8r-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +15 beginbfrange +<06> <07> <0141> +<0E> <0F> <017D> +<18> <19> <2264> +<20> <26> <0020> +<28> <5F> <0028> +<61> <7E> <0061> +<86> <87> <2020> +<93> <94> <201C> +<96> <97> <2013> + <00A1> + <00AE> + <00D8> + <00ED> + <00F7> + <00FC> +endbfrange +50 beginbfchar +<01> <02D9> +<02> <00660069> +<03> <0066006C> +<04> <2044> +<05> <02DD> +<08> <02DB> +<09> <02DA> +<0B> <02D8> +<0C> <2212> +<10> <02C7> +<11> <0131> +<12> <0237> +<13> <00660066> +<14> <006600660069> +<15> <00660066006C> +<16> <2260> +<17> <221E> +<1A> <2202> +<1B> <2211> +<1C> <220F> +<1D> <03C0> +<1E> <0060> +<1F> <0027> +<27> <2019> +<60> <2018> +<80> <20AC> +<81> <222B> +<82> <201A> +<83> <0192> +<84> <201E> +<85> <2026> +<88> <02C6> +<89> <2030> +<8A> <0160> +<8B> <2039> +<8C> <0152> +<8D> <2126> +<8E> <221A> +<8F> <2248> +<95> <2022> +<98> <02DC> +<99> <2122> +<9A> <0161> +<9B> <203A> +<9C> <0153> +<9D> <2206> +<9E> <25CA> +<9F> <0178> + <002D> + <00FF> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2249 0 obj << -/Font << /F51 584 0 R /F59 812 0 R /F54 586 0 R >> -/ProcSet [ /PDF /Text ] +/Length 1477 >> -% 1904 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-pplr8r-8r-0) +%%Title: (TeX-pplr8r-8r-0 TeX pplr8r-8r 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (pplr8r-8r) +/Supplement 0 +>> def +/CMapName /TeX-pplr8r-8r-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +15 beginbfrange +<06> <07> <0141> +<0E> <0F> <017D> +<18> <19> <2264> +<20> <26> <0020> +<28> <5F> <0028> +<61> <7E> <0061> +<86> <87> <2020> +<93> <94> <201C> +<96> <97> <2013> + <00A1> + <00AE> + <00D8> + <00ED> + <00F7> + <00FC> +endbfrange +50 beginbfchar +<01> <02D9> +<02> <00660069> +<03> <0066006C> +<04> <2044> +<05> <02DD> +<08> <02DB> +<09> <02DA> +<0B> <02D8> +<0C> <2212> +<10> <02C7> +<11> <0131> +<12> <0237> +<13> <00660066> +<14> <006600660069> +<15> <00660066006C> +<16> <2260> +<17> <221E> +<1A> <2202> +<1B> <2211> +<1C> <220F> +<1D> <03C0> +<1E> <0060> +<1F> <0027> +<27> <2019> +<60> <2018> +<80> <20AC> +<81> <222B> +<82> <201A> +<83> <0192> +<84> <201E> +<85> <2026> +<88> <02C6> +<89> <2030> +<8A> <0160> +<8B> <2039> +<8C> <0152> +<8D> <2126> +<8E> <221A> +<8F> <2248> +<95> <2022> +<98> <02DC> +<99> <2122> +<9A> <0161> +<9B> <203A> +<9C> <0153> +<9D> <2206> +<9E> <25CA> +<9F> <0178> + <002D> + <00FF> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2250 0 obj << -/Type /Page -/Contents 1905 0 R -/Resources 1903 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R +/Length 1482 >> -% 1906 0 obj +stream +%!PS-Adobe-3.0 Resource-CMap +%%DocumentNeededResources: ProcSet (CIDInit) +%%IncludeResource: ProcSet (CIDInit) +%%BeginResource: CMap (TeX-pplri8r-8r-0) +%%Title: (TeX-pplri8r-8r-0 TeX pplri8r-8r 0) +%%Version: 1.000 +%%EndComments +/CIDInit /ProcSet findresource begin +12 dict begin +begincmap +/CIDSystemInfo +<< /Registry (TeX) +/Ordering (pplri8r-8r) +/Supplement 0 +>> def +/CMapName /TeX-pplri8r-8r-0 def +/CMapType 2 def +1 begincodespacerange +<00> +endcodespacerange +15 beginbfrange +<06> <07> <0141> +<0E> <0F> <017D> +<18> <19> <2264> +<20> <26> <0020> +<28> <5F> <0028> +<61> <7E> <0061> +<86> <87> <2020> +<93> <94> <201C> +<96> <97> <2013> + <00A1> + <00AE> + <00D8> + <00ED> + <00F7> + <00FC> +endbfrange +50 beginbfchar +<01> <02D9> +<02> <00660069> +<03> <0066006C> +<04> <2044> +<05> <02DD> +<08> <02DB> +<09> <02DA> +<0B> <02D8> +<0C> <2212> +<10> <02C7> +<11> <0131> +<12> <0237> +<13> <00660066> +<14> <006600660069> +<15> <00660066006C> +<16> <2260> +<17> <221E> +<1A> <2202> +<1B> <2211> +<1C> <220F> +<1D> <03C0> +<1E> <0060> +<1F> <0027> +<27> <2019> +<60> <2018> +<80> <20AC> +<81> <222B> +<82> <201A> +<83> <0192> +<84> <201E> +<85> <2026> +<88> <02C6> +<89> <2030> +<8A> <0160> +<8B> <2039> +<8C> <0152> +<8D> <2126> +<8E> <221A> +<8F> <2248> +<95> <2022> +<98> <02DC> +<99> <2122> +<9A> <0161> +<9B> <203A> +<9C> <0153> +<9D> <2206> +<9E> <25CA> +<9F> <0178> + <002D> + <00FF> +endbfchar +endcmap +CMapName currentdict /CMap defineresource pop +end +end +%%EndResource +%%EOF + +endstream +endobj +2168 0 obj << -/D [1904 0 R /XYZ 149.705 753.953 null] +/Type /ObjStm +/N 100 +/First 973 +/Length 17544 >> -% 569 0 obj +stream +606 0 2163 57 2170 151 2172 269 610 328 614 386 618 444 622 502 626 560 630 618 +634 676 638 734 2169 792 2174 886 2176 1004 642 1062 646 1119 650 1176 654 1233 2173 1290 +2178 1384 2180 1502 2177 1561 2182 1629 2184 1747 2185 1805 2186 1863 931 1921 930 1977 890 2034 +891 2091 906 2148 887 2205 888 2262 2187 2319 883 2377 2188 2434 1046 2492 2181 2550 2191 2644 +2193 2762 918 2821 889 2879 886 2937 882 2995 2058 3053 885 3112 2194 3170 884 3229 2042 3287 +2043 3345 2190 3404 2195 3498 2197 3518 2198 3889 2199 3992 2200 4151 2202 4174 2203 4629 2205 4758 +2206 5056 2207 5702 2209 6173 2210 6804 2211 7275 2213 7850 2215 8075 2217 8319 2219 8667 2221 8893 +2223 9124 2225 9409 2227 9934 2229 10179 2231 10638 2233 10981 2235 11460 2237 12036 2208 12465 2196 12906 +2204 13124 2201 13246 1851 13652 1782 13815 1460 14005 915 14166 914 14356 913 14516 970 14706 1016 14895 +1254 15085 1127 15250 665 15420 667 15610 666 15800 668 15990 881 16103 971 16216 1028 16331 1062 16451 +% 606 0 obj << -/D [1904 0 R /XYZ 150.705 716.092 null] +/D [2164 0 R /XYZ 99.895 284.171 null] >> -% 1903 0 obj +% 2163 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1911 0 obj +% 2170 0 obj << /Type /Page -/Contents 1912 0 R -/Resources 1910 0 R +/Contents 2171 0 R +/Resources 2169 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1907 0 R 1908 0 R ] ->> -% 1907 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [329.163 275.278 335.44 288.868] -/A << /S /GoTo /D (Hfootnote.5) >> ->> -% 1908 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [291.943 134.696 369.462 146.755] -/A << /S /GoTo /D (spdata) >> ->> -% 1913 0 obj -<< -/D [1911 0 R /XYZ 98.895 753.953 null] ->> -% 573 0 obj -<< -/D [1911 0 R /XYZ 99.895 716.092 null] ->> -% 1914 0 obj -<< -/D [1911 0 R /XYZ 99.895 444.811 null] +/Parent 2158 0 R >> -% 1915 0 obj -<< -/D [1911 0 R /XYZ 99.895 444.971 null] ->> -% 1916 0 obj +% 2172 0 obj << -/D [1911 0 R /XYZ 99.895 433.015 null] +/D [2170 0 R /XYZ 149.705 753.953 null] >> -% 1917 0 obj +% 610 0 obj << -/D [1911 0 R /XYZ 114.242 129.79 null] +/D [2170 0 R /XYZ 150.705 720.077 null] >> -% 1910 0 obj +% 614 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F61 1360 0 R /F59 812 0 R >> -/ProcSet [ /PDF /Text ] +/D [2170 0 R /XYZ 150.705 633.991 null] >> -% 1922 0 obj +% 618 0 obj << -/Type /Page -/Contents 1923 0 R -/Resources 1921 0 R -/MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1909 0 R 1918 0 R 1919 0 R 1920 0 R ] +/D [2170 0 R /XYZ 150.705 559.861 null] >> -% 1909 0 obj +% 622 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 654.503 409.811 666.562] -/A << /S /GoTo /D (precdata) >> +/D [2170 0 R /XYZ 150.705 485.732 null] >> -% 1918 0 obj +% 626 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 584.479 469.357 596.539] -/A << /S /GoTo /D (vdata) >> +/D [2170 0 R /XYZ 150.705 411.602 null] >> -% 1919 0 obj -<< -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [393.303 514.456 469.357 526.516] -/A << /S /GoTo /D (vdata) >> +% 630 0 obj +<< +/D [2170 0 R /XYZ 150.705 325.516 null] >> -% 1920 0 obj +% 634 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.753 374.41 409.811 386.47] -/A << /S /GoTo /D (descdata) >> +/D [2170 0 R /XYZ 150.705 251.386 null] >> -% 1924 0 obj +% 638 0 obj << -/D [1922 0 R /XYZ 149.705 753.953 null] +/D [2170 0 R /XYZ 150.705 177.256 null] >> -% 1921 0 obj +% 2169 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R /F85 814 0 R /F83 813 0 R /F61 1360 0 R >> +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1927 0 obj +% 2174 0 obj << /Type /Page -/Contents 1928 0 R -/Resources 1926 0 R +/Contents 2175 0 R +/Resources 2173 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R -/Annots [ 1925 0 R ] +/Parent 2158 0 R >> -% 1925 0 obj +% 2176 0 obj << -/Type /Annot -/Subtype /Link -/Border[0 0 0]/H/I/C[1 0 0] -/Rect [342.493 554.876 418.548 566.936] -/A << /S /GoTo /D (vdata) >> +/D [2174 0 R /XYZ 98.895 753.953 null] >> -% 1929 0 obj +% 642 0 obj << -/D [1927 0 R /XYZ 98.895 753.953 null] +/D [2174 0 R /XYZ 99.895 720.077 null] >> -% 1926 0 obj +% 646 0 obj +<< +/D [2174 0 R /XYZ 99.895 650.062 null] +>> +% 650 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F59 812 0 R /F52 585 0 R >> +/D [2174 0 R /XYZ 99.895 580.047 null] +>> +% 654 0 obj +<< +/D [2174 0 R /XYZ 99.895 510.033 null] +>> +% 2173 0 obj +<< +/Font << /F59 665 0 R /F67 913 0 R /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1931 0 obj +% 2178 0 obj << /Type /Page -/Contents 1932 0 R -/Resources 1930 0 R +/Contents 2179 0 R +/Resources 2177 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1902 0 R +/Parent 2158 0 R >> -% 1933 0 obj +% 2180 0 obj << -/D [1931 0 R /XYZ 149.705 753.953 null] +/D [2178 0 R /XYZ 149.705 753.953 null] >> -% 1930 0 obj +% 2177 0 obj << -/Font << /F54 586 0 R >> +/Font << /F62 667 0 R >> /ProcSet [ /PDF /Text ] >> -% 1935 0 obj +% 2182 0 obj << /Type /Page -/Contents 1936 0 R -/Resources 1934 0 R +/Contents 2183 0 R +/Resources 2181 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1942 0 R +/Parent 2189 0 R >> -% 1937 0 obj +% 2184 0 obj << -/D [1935 0 R /XYZ 98.895 753.953 null] +/D [2182 0 R /XYZ 98.895 753.953 null] >> -% 1938 0 obj +% 2185 0 obj << -/D [1935 0 R /XYZ 99.895 723.717 null] +/D [2182 0 R /XYZ 99.895 723.717 null] >> -% 783 0 obj +% 2186 0 obj << -/D [1935 0 R /XYZ 99.895 698.622 null] +/D [2182 0 R /XYZ 99.895 698.792 null] >> -% 1939 0 obj +% 931 0 obj << -/D [1935 0 R /XYZ 99.895 640.564 null] +/D [2182 0 R /XYZ 99.895 638.64 null] >> -% 831 0 obj +% 930 0 obj << -/D [1935 0 R /XYZ 99.895 585.057 null] +/D [2182 0 R /XYZ 99.895 583.689 null] >> -% 830 0 obj +% 890 0 obj << -/D [1935 0 R /XYZ 99.895 532.2 null] +/D [2182 0 R /XYZ 99.895 523.438 null] >> -% 789 0 obj +% 891 0 obj << -/D [1935 0 R /XYZ 99.895 474.043 null] +/D [2182 0 R /XYZ 99.895 479.068 null] >> -% 790 0 obj +% 906 0 obj << -/D [1935 0 R /XYZ 99.895 431.766 null] +/D [2182 0 R /XYZ 99.895 433.422 null] >> -% 805 0 obj +% 887 0 obj << -/D [1935 0 R /XYZ 99.895 388.215 null] +/D [2182 0 R /XYZ 99.895 386.501 null] >> -% 786 0 obj +% 888 0 obj << -/D [1935 0 R /XYZ 99.895 343.387 null] +/D [2182 0 R /XYZ 99.895 340.855 null] >> -% 787 0 obj +% 2187 0 obj << -/D [1935 0 R /XYZ 99.895 299.836 null] +/D [2182 0 R /XYZ 99.895 295.209 null] >> -% 1940 0 obj +% 883 0 obj << -/D [1935 0 R /XYZ 99.895 256.284 null] +/D [2182 0 R /XYZ 99.895 249.563 null] >> -% 782 0 obj +% 2188 0 obj << -/D [1935 0 R /XYZ 99.895 212.732 null] +/D [2182 0 R /XYZ 99.895 204.585 null] >> -% 1941 0 obj +% 1046 0 obj << -/D [1935 0 R /XYZ 99.895 169.848 null] +/D [2182 0 R /XYZ 99.895 146.317 null] >> -% 1934 0 obj +% 2181 0 obj << -/Font << /F51 584 0 R /F54 586 0 R /F52 585 0 R >> +/Font << /F59 665 0 R /F62 667 0 R /F60 666 0 R >> /ProcSet [ /PDF /Text ] >> -% 1944 0 obj +% 2191 0 obj << /Type /Page -/Contents 1945 0 R -/Resources 1943 0 R +/Contents 2192 0 R +/Resources 2190 0 R /MediaBox [0 0 595.276 841.89] -/Parent 1942 0 R +/Parent 2189 0 R >> -% 1946 0 obj +% 2193 0 obj << -/D [1944 0 R /XYZ 149.705 753.953 null] +/D [2191 0 R /XYZ 149.705 753.953 null] >> -% 946 0 obj +% 918 0 obj << -/D [1944 0 R /XYZ 150.705 716.092 null] +/D [2191 0 R /XYZ 150.705 716.092 null] >> -% 817 0 obj +% 889 0 obj << -/D [1944 0 R /XYZ 150.705 687.379 null] +/D [2191 0 R /XYZ 150.705 664.064 null] >> -% 788 0 obj +% 886 0 obj << -/D [1944 0 R /XYZ 150.705 632.184 null] +/D [2191 0 R /XYZ 150.705 622.283 null] >> -% 785 0 obj +% 882 0 obj << -/D [1944 0 R /XYZ 150.705 590.403 null] +/D [2191 0 R /XYZ 150.705 577.073 null] >> -% 781 0 obj +% 2058 0 obj << -/D [1944 0 R /XYZ 150.705 545.192 null] +/D [2191 0 R /XYZ 150.705 543.917 null] >> -% 784 0 obj +% 885 0 obj << -/D [1944 0 R /XYZ 150.705 512.037 null] +/D [2191 0 R /XYZ 150.705 512.037 null] >> -% 1947 0 obj +% 2194 0 obj << -/D [1944 0 R /XYZ 150.705 480.156 null] +/D [2191 0 R /XYZ 150.705 480.156 null] >> -% 1943 0 obj +% 884 0 obj +<< +/D [2191 0 R /XYZ 150.705 438.971 null] +>> +% 2042 0 obj << -/Font << /F54 586 0 R /F52 585 0 R /F59 812 0 R >> +/D [2191 0 R /XYZ 150.705 380.53 null] +>> +% 2043 0 obj +<< +/D [2191 0 R /XYZ 150.705 336.754 null] +>> +% 2190 0 obj +<< +/Font << /F62 667 0 R /F60 666 0 R /F67 913 0 R >> /ProcSet [ /PDF /Text ] >> -% 1948 0 obj +% 2195 0 obj [1000] -% 1949 0 obj -[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 1950 0 obj -[777.8 500 777.8] -% 1951 0 obj +% 2197 0 obj +[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] +% 2198 0 obj +[277.8 277.8 500 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 777.8 500 777.8] +% 2199 0 obj [853 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 666 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 747 0 0 0 0 0 0 0 0 0 0 0 0 0 0 881 0 0 0 0 0 0 0 0 0 0 0 0 234 0 881 767] -% 1952 0 obj +% 2200 0 obj [528 542] -% 1953 0 obj -[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 1954 0 obj +% 2202 0 obj +[525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] +% 2203 0 obj [531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3 531.3] -% 1955 0 obj -[388.9 388.9 500 777.8 277.8 333.3 277.8 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 277.8 777.8] -% 1956 0 obj +% 2205 0 obj +[388.9 388.9 500 777.8 277.8 333.3 277.8 500 500 500 500 500 500 500 500 500 500 500 277.8 277.8 277.8 777.8 472.2 472.2 777.8 750 708.3 722.2 763.9 680.6 652.8 784.7 750 361.1 513.9 777.8 625 916.7 750 777.8 680.6 777.8 736.1 555.6 722.2 750 750 1027.8 750 750 611.1 277.8 500 277.8] +% 2206 0 obj [777.8 277.8 777.8 500 777.8 500 777.8 777.8 777.8 777.8 777.8 777.8 777.8 1000 500 500 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 777.8 1000 1000 777.8 777.8 1000 1000 500 500 1000 1000 1000 777.8 1000 1000 611.1 611.1 1000 1000 1000 777.8 275 1000 666.7 666.7 888.9 888.9 0 0 555.6 555.6 666.7 500 722.2 722.2 777.8 777.8 611.1 798.5 656.8 526.5 771.4 527.8 718.7 594.9 844.5 544.5 677.8 761.9 689.7 1200.9 820.5 796.1 695.6 816.7 847.5 605.6 544.6 625.8 612.8 987.8 713.3 668.3 724.7 666.7 666.7 666.7 666.7 666.7 611.1 611.1 444.4 444.4 444.4 444.4 500 500 388.9 388.9 277.8 500 500 611.1 500 277.8 833.3] -% 1957 0 obj +% 2207 0 obj [525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525 525] -% 1959 0 obj +% 2209 0 obj [605 608 167 380 611 291 313 333 0 333 606 0 667 500 333 287 0 0 0 0 0 0 0 0 0 0 0 0 333 208 250 278 371 500 500 840 778 278 333 333 389 606 250 333 250 606 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 444 747 778 611 709 774 611 556 763 832 337 333 726 611 946 831 786 604 786 668 525 613 778 722 1000 667 667 667 333 606 333 606 500 278 500 553 444 611 479 333 556 582 291 234 556 291 883 582 546 601 560 395 424 326 603 565 834 516 556 500 333 606 333 606 0 0 0 278 500 500 1000 500 500 333 1144 525 331 998 0 0 0 0 0 0 500 500 606 500 1000 333 979 424 331 827 0 0 667 0 278 500 500 500 500 606 500] -% 1960 0 obj +% 2210 0 obj [528 545 167 333 556 278 333 333 0 333 606 0 667 444 333 278 0 0 0 0 0 0 0 0 0 0 0 0 333 333 250 333 500 500 500 889 778 278 333 333 389 606 250 333 250 296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 500 747 722 611 667 778 611 556 722 778 333 333 667 556 944 778 778 611 778 667 556 611 778 722 944 722 667 667 333 606 333 606 500 278 444 463 407 500 389 278 500 500 278 278 444 278 778 556 444 500 463 389 389 333 556 500 722 500 500 444] -% 1961 0 obj +% 2211 0 obj [611 611 167 333 611 333 333 333 0 333 606 0 667 500 333 333 0 0 0 0 0 0 0 0 0 0 0 0 333 227 250 278 402 500 500 889 833 278 333 333 444 606 250 333 250 296 500 500 500 500 500 500 500 500 500 500 250 250 606 606 606 444 747 778 667 722 833 611 556 833 833 389 389 778 611 1000 833 833 611 833 722 611 667 778 778 1000 667 667 667 333 606 333 606 500 278 500 611 444 611 500 389 556 611 333 333 611 333 889 611 556 611 611 389 444 333 611 556 833 500 556 500 310 606 310 606 0 0 0 333 500 500 1000 500 500 333 1000 611 389 1000 0 0 0 0 0 0 500 500 606 500 1000] -% 1963 0 obj +% 2213 0 obj << /Type /FontDescriptor /FontName /MNPEHI+CMEX10 @@ -28025,89 +34493,104 @@ stream /StemV 47 /XHeight 431 /CharSet (/radicalbigg) -/FontFile 1962 0 R +/FontFile 2212 0 R >> -% 1965 0 obj +% 2215 0 obj << /Type /FontDescriptor -/FontName /MPVPBL+CMITT10 +/FontName /TPELEW+CMMI10 /Flags 4 -/FontBBox [11 -233 669 696] -/Ascent 611 -/CapHeight 611 -/Descent -222 +/FontBBox [-32 -250 1048 750] +/Ascent 694 +/CapHeight 683 +/Descent -194 /ItalicAngle -14 -/StemV 69 +/StemV 72 /XHeight 431 -/CharSet (/D/a/c/d/e/exclam/n/o/period/s/t) -/FontFile 1964 0 R +/CharSet (/arrowhookleft/greater/less) +/FontFile 2214 0 R >> -% 1967 0 obj +% 2217 0 obj << /Type /FontDescriptor -/FontName /SYFPBV+CMMI10 +/FontName /VKSUEJ+CMSY10 /Flags 4 -/FontBBox [-32 -250 1048 750] -/Ascent 694 +/FontBBox [-29 -960 1116 775] +/Ascent 750 /CapHeight 683 /Descent -194 /ItalicAngle -14 -/StemV 72 +/StemV 40 /XHeight 431 -/CharSet (/greater/less) -/FontFile 1966 0 R +/CharSet (/B/H/I/arrowleft/arrowright/asteriskmath/bar/bardbl/braceleft/braceright/element/greaterequal/lessequal/minus/negationslash/radical) +/FontFile 2216 0 R >> -% 1969 0 obj +% 2219 0 obj << /Type /FontDescriptor -/FontName /GIGFZE+CMR10 +/FontName /IKXQUG+PazoMath /Flags 4 -/FontBBox [-40 -250 1009 750] -/Ascent 694 -/CapHeight 683 -/Descent -194 +/FontBBox [-40 -283 878 946] +/Ascent 0 +/CapHeight 0 +/Descent 0 /ItalicAngle 0 -/StemV 69 -/XHeight 431 -/CharSet (/equal/parenleft/parenright/plus) -/FontFile 1968 0 R +/StemV 95 +/XHeight 0 +/CharSet (/infinity/summation) +/FontFile 2218 0 R >> -% 1971 0 obj +% 2221 0 obj << /Type /FontDescriptor -/FontName /DMJGRR+CMSY10 +/FontName /DUJUUF+PazoMath-Italic /Flags 4 -/FontBBox [-29 -960 1116 775] -/Ascent 750 +/FontBBox [-70 -277 902 733] +/Ascent 482 +/CapHeight 0 +/Descent -276 +/ItalicAngle -9 +/StemV 65 +/XHeight 0 +/CharSet (/alpha/beta) +/FontFile 2220 0 R +>> +% 2223 0 obj +<< +/Type /FontDescriptor +/FontName /NCCVYE+LMRoman10-Regular +/Flags 4 +/FontBBox [-430 -290 1417 1127] +/Ascent 694 /CapHeight 683 /Descent -194 -/ItalicAngle -14 -/StemV 40 +/ItalicAngle 0 +/StemV 69 /XHeight 431 -/CharSet (/B/H/I/arrowleft/bar/bardbl/braceleft/braceright/element/greaterequal/lessequal/minus/negationslash/radical) -/FontFile 1970 0 R +/CharSet (/bracketleft/bracketright/equal/parenleft/parenright/plus) +/FontFile 2222 0 R >> -% 1973 0 obj +% 2225 0 obj << /Type /FontDescriptor -/FontName /UFPYIQ+CMTT10 +/FontName /XKYJEW+LMMono10-Regular /Flags 4 -/FontBBox [-4 -233 537 696] +/FontBBox [-451 -316 731 1016] /Ascent 611 /CapHeight 611 /Descent -222 /ItalicAngle 0 /StemV 69 /XHeight 431 -/CharSet (/A/B/C/D/E/F/I/K/L/M/N/O/P/R/S/T/U/W/Y/a/ampersand/asciitilde/asterisk/b/backslash/bracketleft/bracketright/c/colon/comma/d/e/eight/equal/f/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/six/slash/t/three/two/u/underscore/v/w/x/y/z/zero) -/FontFile 1972 0 R +/CharSet (/A/B/C/D/E/F/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/Z/a/ampersand/asciitilde/asterisk/b/backslash/bracketleft/bracketright/c/colon/comma/d/e/eight/equal/f/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/six/slash/t/three/two/u/underscore/v/w/x/y/z/zero) +/FontFile 2224 0 R >> -% 1975 0 obj +% 2227 0 obj << /Type /FontDescriptor -/FontName /HZGQIC+CMTT8 +/FontName /XHVBMR+LMMono8-Regular /Flags 4 -/FontBBox [-5 -232 545 699] +/FontBBox [-456 -320 743 1014] /Ascent 611 /CapHeight 611 /Descent -222 @@ -28115,57 +34598,42 @@ stream /StemV 76 /XHeight 431 /CharSet (/b/c/e/i/l/n/p/r/s/t) -/FontFile 1974 0 R +/FontFile 2226 0 R >> -% 1977 0 obj +% 2229 0 obj << /Type /FontDescriptor -/FontName /BQXTWV+CMTT9 +/FontName /TWMFXI+LMMono9-Regular /Flags 4 -/FontBBox [-6 -233 542 698] +/FontBBox [-451 -318 734 1016] /Ascent 611 /CapHeight 611 /Descent -222 /ItalicAngle 0 /StemV 74 /XHeight 431 -/CharSet (/D/E/I/K/N/P/T/Y/a/ampersand/b/c/colon/comma/d/e/equal/f/g/greater/h/i/k/l/less/m/n/o/p/parenleft/parenright/period/q/r/s/semicolon/t/two/u/underscore/v/w/x/y/z) -/FontFile 1976 0 R ->> -% 1979 0 obj -<< -/Type /FontDescriptor -/FontName /IKXQUG+PazoMath -/Flags 4 -/FontBBox [-40 -283 878 946] -/Ascent 0 -/CapHeight 0 -/Descent 0 -/ItalicAngle 0 -/StemV 95 -/XHeight 0 -/CharSet (/infinity/summation) -/FontFile 1978 0 R +/CharSet (/D/E/I/K/N/P/S/T/Y/a/ampersand/asterisk/b/c/colon/comma/d/e/equal/f/four/g/greater/h/hyphen/i/j/k/l/less/m/n/nine/o/one/p/parenleft/parenright/percent/period/plus/q/quotesingle/r/s/semicolon/six/slash/t/two/u/underscore/v/w/x/y/z/zero) +/FontFile 2228 0 R >> -% 1981 0 obj +% 2231 0 obj << /Type /FontDescriptor -/FontName /DUJUUF+PazoMath-Italic +/FontName /XVBOSG+LMMono10-Italic /Flags 4 -/FontBBox [-70 -277 902 733] -/Ascent 482 -/CapHeight 0 -/Descent -276 -/ItalicAngle -9 -/StemV 65 -/XHeight 0 -/CharSet (/alpha/beta) -/FontFile 1980 0 R +/FontBBox [-491 -316 834 1016] +/Ascent 611 +/CapHeight 611 +/Descent -222 +/ItalicAngle -14 +/StemV 69 +/XHeight 431 +/CharSet (/A/C/D/E/H/I/K/L/M/P/T/V/a/c/comma/d/e/exclam/f/g/h/hyphen/i/k/m/n/o/p/parenleft/parenright/period/r/s/slash/t/w/x/y) +/FontFile 2230 0 R >> -% 1983 0 obj +% 2233 0 obj << /Type /FontDescriptor -/FontName /TVMKYN+URWPalladioL-Bold +/FontName /BDDEWM+URWPalladioL-Bold /Flags 4 /FontBBox [-152 -301 1000 935] /Ascent 708 @@ -28174,13 +34642,13 @@ stream /ItalicAngle 0 /StemV 123 /XHeight 471 -/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/Z/a/b/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/quoteright/r/s/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 1982 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/Y/Z/a/b/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/question/quoteright/r/s/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) +/FontFile 2232 0 R >> -% 1985 0 obj +% 2235 0 obj << /Type /FontDescriptor -/FontName /TCRNJT+URWPalladioL-Roma +/FontName /GLTUCO+URWPalladioL-Roma /Flags 4 /FontBBox [-166 -283 1021 943] /Ascent 715 @@ -28189,13 +34657,13 @@ stream /ItalicAngle 0 /StemV 84 /XHeight 469 -/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/a/ampersand/b/bracketleft/bracketright/bullet/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/q/quotedblleft/quotedblright/quoteright/r/s/section/semicolon/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 1984 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/R/S/T/U/V/W/X/Y/Z/a/ampersand/asterisk/b/bracketleft/bracketright/bullet/c/colon/comma/d/e/eight/emdash/endash/equal/f/fi/five/fl/four/g/grave/h/hyphen/i/j/k/l/m/n/nine/o/one/p/parenleft/parenright/period/plus/q/quotedblleft/quotedblright/quoteright/r/s/section/semicolon/seven/six/slash/t/three/two/u/v/w/x/y/z/zero) +/FontFile 2234 0 R >> -% 1987 0 obj +% 2237 0 obj << /Type /FontDescriptor -/FontName /RUEFYH+URWPalladioL-Ital +/FontName /LHHPET+URWPalladioL-Ital /Flags 4 /FontBBox [-170 -305 1010 941] /Ascent 722 @@ -28204,419 +34672,634 @@ stream /ItalicAngle -9 /StemV 78 /XHeight 482 -/CharSet (/A/B/C/D/E/F/G/H/I/L/M/N/O/P/Q/R/S/T/U/V/X/a/b/c/colon/d/e/f/fi/five/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/period/q/quoteright/r/s/slash/t/three/two/u/v/w/x/y/z/zero) -/FontFile 1986 0 R +/CharSet (/A/B/C/D/E/F/G/H/I/K/L/M/N/O/P/Q/R/S/T/U/V/X/a/b/bracketleft/bracketright/c/colon/comma/d/e/f/fi/five/g/h/hyphen/i/j/k/l/m/n/nine/o/one/p/period/plus/q/quoteright/r/s/slash/t/three/two/u/v/w/x/y/z/zero) +/FontFile 2236 0 R >> -% 1958 0 obj +% 2208 0 obj << /Type /Encoding -/Differences [2/fi/fl 38/ampersand/quoteright/parenleft/parenright 44/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon 61/equal 65/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X 90/Z/bracketleft 93/bracketright 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 147/quotedblleft/quotedblright/bullet/endash/emdash 167/section] +/Differences [2/fi/fl 30/grave 38/ampersand/quoteright/parenleft/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon/semicolon 61/equal 63/question 65/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S/T/U/V/W/X/Y/Z/bracketleft 93/bracketright 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 147/quotedblleft/quotedblright/bullet/endash/emdash 167/section] >> -% 1750 0 obj +% 2196 0 obj +<< +/Type /Encoding +/Differences [33/exclam 40/parenleft/parenright 44/comma/hyphen/period/slash 65/A 67/C/D/E 72/H/I 75/K/L/M 80/P 84/T 86/V 97/a 99/c/d/e/f/g/h/i 107/k 109/m/n/o/p 114/r/s/t 119/w/x/y] +>> +% 2204 0 obj +<< +/Type /Encoding +/Differences [40/parenleft/parenright 43/plus 61/equal 91/bracketleft 93/bracketright] +>> +% 2201 0 obj +<< +/Type /Encoding +/Differences [13/quotesingle 37/percent/ampersand 40/parenleft/parenright/asterisk/plus/comma/hyphen/period/slash/zero/one/two/three/four 54/six 56/eight/nine/colon/semicolon/less/equal/greater 65/A/B/C/D/E/F 72/H/I/J/K/L/M/N/O/P 82/R/S/T/U/V/W/X/Y/Z/bracketleft/backslash/bracketright 95/underscore 97/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z 126/asciitilde] +>> +% 1851 0 obj << /Type /Font /Subtype /Type1 /BaseFont /MNPEHI+CMEX10 -/FontDescriptor 1963 0 R +/FontDescriptor 2213 0 R /FirstChar 114 /LastChar 114 -/Widths 1948 0 R +/Widths 2195 0 R +/ToUnicode 2238 0 R >> -% 1682 0 obj +% 1782 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /MPVPBL+CMITT10 -/FontDescriptor 1965 0 R +/BaseFont /XVBOSG+LMMono10-Italic +/FontDescriptor 2231 0 R /FirstChar 33 -/LastChar 116 -/Widths 1949 0 R +/LastChar 121 +/Widths 2197 0 R +/Encoding 2196 0 R +/ToUnicode 2239 0 R >> -% 1360 0 obj +% 1460 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /SYFPBV+CMMI10 -/FontDescriptor 1967 0 R -/FirstChar 60 +/BaseFont /TPELEW+CMMI10 +/FontDescriptor 2215 0 R +/FirstChar 44 /LastChar 62 -/Widths 1950 0 R +/Widths 2198 0 R +/ToUnicode 2240 0 R >> -% 814 0 obj +% 915 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /GIGFZE+CMR10 -/FontDescriptor 1969 0 R +/BaseFont /NCCVYE+LMRoman10-Regular +/FontDescriptor 2223 0 R /FirstChar 40 -/LastChar 61 -/Widths 1955 0 R +/LastChar 93 +/Widths 2205 0 R +/Encoding 2204 0 R +/ToUnicode 2241 0 R >> -% 813 0 obj +% 914 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /DMJGRR+CMSY10 -/FontDescriptor 1971 0 R +/BaseFont /VKSUEJ+CMSY10 +/FontDescriptor 2217 0 R /FirstChar 0 /LastChar 112 -/Widths 1956 0 R +/Widths 2206 0 R +/ToUnicode 2242 0 R >> -% 812 0 obj +% 913 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /UFPYIQ+CMTT10 -/FontDescriptor 1973 0 R +/BaseFont /XKYJEW+LMMono10-Regular +/FontDescriptor 2225 0 R /FirstChar 13 /LastChar 126 -/Widths 1957 0 R +/Widths 2207 0 R +/Encoding 2201 0 R +/ToUnicode 2243 0 R >> -% 870 0 obj +% 970 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /HZGQIC+CMTT8 -/FontDescriptor 1975 0 R +/BaseFont /XHVBMR+LMMono8-Regular +/FontDescriptor 2227 0 R /FirstChar 98 /LastChar 116 -/Widths 1954 0 R +/Widths 2203 0 R +/Encoding 2201 0 R +/ToUnicode 2244 0 R >> -% 915 0 obj +% 1016 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /BQXTWV+CMTT9 -/FontDescriptor 1977 0 R -/FirstChar 38 +/BaseFont /TWMFXI+LMMono9-Regular +/FontDescriptor 2229 0 R +/FirstChar 13 /LastChar 122 -/Widths 1953 0 R +/Widths 2202 0 R +/Encoding 2201 0 R +/ToUnicode 2245 0 R >> - -endstream -endobj -1988 0 obj -<< -/Type /ObjStm -/N 100 -/First 925 -/Length 11101 ->> -stream -1154 0 1027 145 584 295 586 465 585 635 587 805 780 918 871 1031 927 1144 962 1257 -991 1370 1034 1487 1079 1607 1121 1727 1180 1847 1232 1967 1281 2087 1330 2207 1371 2327 1409 2447 -1456 2567 1500 2687 1537 2807 1574 2927 1612 3047 1653 3167 1683 3287 1717 3407 1754 3527 1789 3647 -1828 3767 1865 3887 1902 4007 1942 4127 1989 4211 1990 4326 1991 4447 1992 4568 1993 4689 1994 4801 -1995 4897 574 4966 570 5026 566 5137 562 5211 558 5299 554 5387 550 5475 546 5563 542 5637 -538 5762 534 5836 530 5924 526 6012 522 6100 518 6188 514 6262 510 6387 506 6461 502 6549 -498 6637 494 6711 490 6836 486 6910 482 6998 478 7086 474 7174 470 7262 466 7350 462 7438 -458 7526 454 7614 450 7702 446 7790 442 7878 438 7966 434 8054 430 8142 426 8230 422 8304 -418 8430 414 8504 410 8592 406 8680 401 8768 397 8856 393 8944 389 9032 385 9120 381 9208 -377 9296 373 9384 369 9472 365 9560 361 9648 357 9736 353 9824 349 9912 345 10000 341 10088 -% 1154 0 obj +% 1254 0 obj << /Type /Font /Subtype /Type1 /BaseFont /IKXQUG+PazoMath -/FontDescriptor 1979 0 R +/FontDescriptor 2219 0 R /FirstChar 165 /LastChar 229 -/Widths 1951 0 R +/Widths 2199 0 R +/ToUnicode 2246 0 R >> -% 1027 0 obj +% 1127 0 obj << /Type /Font /Subtype /Type1 /BaseFont /DUJUUF+PazoMath-Italic -/FontDescriptor 1981 0 R +/FontDescriptor 2221 0 R /FirstChar 97 /LastChar 98 -/Widths 1952 0 R +/Widths 2200 0 R +/ToUnicode 2247 0 R >> -% 584 0 obj +% 665 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /TVMKYN+URWPalladioL-Bold -/FontDescriptor 1983 0 R +/BaseFont /BDDEWM+URWPalladioL-Bold +/FontDescriptor 2233 0 R /FirstChar 2 /LastChar 151 -/Widths 1961 0 R -/Encoding 1958 0 R +/Widths 2211 0 R +/Encoding 2208 0 R +/ToUnicode 2248 0 R >> -% 586 0 obj +% 667 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /TCRNJT+URWPalladioL-Roma -/FontDescriptor 1985 0 R +/BaseFont /GLTUCO+URWPalladioL-Roma +/FontDescriptor 2235 0 R /FirstChar 2 /LastChar 167 -/Widths 1959 0 R -/Encoding 1958 0 R +/Widths 2209 0 R +/Encoding 2208 0 R +/ToUnicode 2249 0 R >> -% 585 0 obj +% 666 0 obj << /Type /Font /Subtype /Type1 -/BaseFont /RUEFYH+URWPalladioL-Ital -/FontDescriptor 1987 0 R +/BaseFont /LHHPET+URWPalladioL-Ital +/FontDescriptor 2237 0 R /FirstChar 2 /LastChar 122 -/Widths 1960 0 R -/Encoding 1958 0 R +/Widths 2210 0 R +/Encoding 2208 0 R +/ToUnicode 2250 0 R >> -% 587 0 obj +% 668 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [577 0 R 590 0 R 636 0 R 694 0 R 740 0 R 761 0 R] +/Parent 2251 0 R +/Kids [658 0 R 671 0 R 717 0 R 774 0 R 821 0 R 861 0 R] >> -% 780 0 obj +% 881 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [778 0 R 797 0 R 809 0 R 822 0 R 835 0 R 840 0 R] +/Parent 2251 0 R +/Kids [879 0 R 898 0 R 910 0 R 923 0 R 935 0 R 940 0 R] >> -% 871 0 obj +% 971 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [853 0 R 874 0 R 885 0 R 893 0 R 904 0 R 920 0 R] +/Parent 2251 0 R +/Kids [953 0 R 975 0 R 986 0 R 994 0 R 1005 0 R 1021 0 R] >> -% 927 0 obj +% 1028 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [924 0 R 930 0 R 935 0 R 942 0 R 949 0 R 954 0 R] +/Parent 2251 0 R +/Kids [1025 0 R 1030 0 R 1035 0 R 1042 0 R 1049 0 R 1054 0 R] >> -% 962 0 obj +% 1062 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [959 0 R 964 0 R 968 0 R 972 0 R 976 0 R 982 0 R] +/Parent 2251 0 R +/Kids [1059 0 R 1065 0 R 1069 0 R 1073 0 R 1077 0 R 1083 0 R] >> -% 991 0 obj + +endstream +endobj +2252 0 obj +<< +/Type /ObjStm +/N 100 +/First 923 +/Length 10703 +>> +stream +1092 0 1134 120 1180 240 1222 360 1281 480 1333 600 1382 720 1431 840 1471 960 1510 1080 +1557 1200 1600 1320 1638 1440 1674 1560 1713 1680 1754 1800 1783 1920 1817 2040 1855 2160 1890 2280 +1928 2400 1966 2520 2002 2640 2057 2760 2158 2880 2189 3000 2251 3084 2253 3202 2254 3323 2255 3444 +2256 3565 2257 3686 2258 3761 2259 3866 655 3935 651 4009 647 4097 643 4185 639 4273 635 4361 +631 4449 627 4537 623 4625 619 4713 615 4801 611 4889 607 4977 602 5065 598 5139 594 5251 +590 5325 586 5413 582 5501 578 5575 574 5700 570 5760 566 5885 562 5959 558 6047 554 6135 +550 6223 546 6311 542 6385 538 6510 534 6584 530 6672 526 6760 522 6848 518 6936 514 7010 +510 7135 506 7209 502 7297 498 7385 494 7459 490 7584 486 7658 482 7746 478 7834 474 7922 +470 8010 466 8098 462 8186 458 8274 454 8362 450 8450 446 8538 442 8626 438 8714 434 8802 +430 8890 426 8978 422 9052 418 9178 414 9252 410 9340 406 9428 401 9516 397 9604 393 9692 +% 1092 0 obj << /Type /Pages /Count 6 -/Parent 1989 0 R -/Kids [988 0 R 995 0 R 1002 0 R 1008 0 R 1013 0 R 1024 0 R] +/Parent 2251 0 R +/Kids [1089 0 R 1096 0 R 1103 0 R 1109 0 R 1113 0 R 1124 0 R] >> -% 1034 0 obj +% 1134 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1031 0 R 1041 0 R 1047 0 R 1058 0 R 1063 0 R 1070 0 R] +/Parent 2253 0 R +/Kids [1131 0 R 1141 0 R 1147 0 R 1158 0 R 1164 0 R 1171 0 R] >> -% 1079 0 obj +% 1180 0 obj +<< +/Type /Pages +/Count 6 +/Parent 2253 0 R +/Kids [1176 0 R 1185 0 R 1193 0 R 1198 0 R 1206 0 R 1211 0 R] +>> +% 1222 0 obj +<< +/Type /Pages +/Count 6 +/Parent 2253 0 R +/Kids [1218 0 R 1224 0 R 1237 0 R 1244 0 R 1251 0 R 1262 0 R] +>> +% 1281 0 obj +<< +/Type /Pages +/Count 6 +/Parent 2253 0 R +/Kids [1278 0 R 1285 0 R 1296 0 R 1302 0 R 1313 0 R 1318 0 R] +>> +% 1333 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1075 0 R 1084 0 R 1092 0 R 1097 0 R 1105 0 R 1110 0 R] +/Parent 2253 0 R +/Kids [1329 0 R 1335 0 R 1344 0 R 1350 0 R 1358 0 R 1365 0 R] >> -% 1121 0 obj +% 1382 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1117 0 R 1124 0 R 1137 0 R 1144 0 R 1151 0 R 1162 0 R] +/Parent 2253 0 R +/Kids [1379 0 R 1387 0 R 1396 0 R 1404 0 R 1408 0 R 1423 0 R] >> -% 1180 0 obj +% 1431 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1177 0 R 1184 0 R 1195 0 R 1201 0 R 1212 0 R 1217 0 R] +/Parent 2254 0 R +/Kids [1428 0 R 1435 0 R 1442 0 R 1446 0 R 1451 0 R 1457 0 R] >> -% 1232 0 obj +% 1471 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1228 0 R 1235 0 R 1244 0 R 1250 0 R 1258 0 R 1265 0 R] +/Parent 2254 0 R +/Kids [1463 0 R 1474 0 R 1479 0 R 1488 0 R 1496 0 R 1501 0 R] >> -% 1281 0 obj +% 1510 0 obj << /Type /Pages /Count 6 -/Parent 1990 0 R -/Kids [1278 0 R 1286 0 R 1295 0 R 1303 0 R 1307 0 R 1322 0 R] +/Parent 2254 0 R +/Kids [1507 0 R 1512 0 R 1520 0 R 1525 0 R 1533 0 R 1539 0 R] >> -% 1330 0 obj +% 1557 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1327 0 R 1334 0 R 1341 0 R 1345 0 R 1351 0 R 1357 0 R] +/Parent 2254 0 R +/Kids [1548 0 R 1562 0 R 1566 0 R 1579 0 R 1585 0 R 1592 0 R] >> -% 1371 0 obj +% 1600 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1363 0 R 1374 0 R 1379 0 R 1388 0 R 1395 0 R 1400 0 R] +/Parent 2254 0 R +/Kids [1596 0 R 1604 0 R 1609 0 R 1618 0 R 1626 0 R 1630 0 R] >> -% 1409 0 obj +% 1638 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1406 0 R 1411 0 R 1419 0 R 1424 0 R 1432 0 R 1438 0 R] +/Parent 2254 0 R +/Kids [1635 0 R 1640 0 R 1647 0 R 1652 0 R 1658 0 R 1664 0 R] >> -% 1456 0 obj +% 1674 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1447 0 R 1461 0 R 1466 0 R 1479 0 R 1485 0 R 1492 0 R] +/Parent 2255 0 R +/Kids [1670 0 R 1677 0 R 1684 0 R 1691 0 R 1695 0 R 1705 0 R] >> -% 1500 0 obj +% 1713 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1496 0 R 1504 0 R 1508 0 R 1517 0 R 1525 0 R 1529 0 R] +/Parent 2255 0 R +/Kids [1710 0 R 1715 0 R 1728 0 R 1732 0 R 1738 0 R 1744 0 R] >> -% 1537 0 obj +% 1754 0 obj << /Type /Pages /Count 6 -/Parent 1991 0 R -/Kids [1534 0 R 1539 0 R 1546 0 R 1551 0 R 1557 0 R 1563 0 R] +/Parent 2255 0 R +/Kids [1751 0 R 1756 0 R 1760 0 R 1764 0 R 1768 0 R 1772 0 R] >> -% 1574 0 obj +% 1783 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1570 0 R 1577 0 R 1584 0 R 1591 0 R 1595 0 R 1605 0 R] +/Parent 2255 0 R +/Kids [1776 0 R 1785 0 R 1789 0 R 1796 0 R 1800 0 R 1807 0 R] >> -% 1612 0 obj +% 1817 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1609 0 R 1614 0 R 1627 0 R 1631 0 R 1637 0 R 1643 0 R] +/Parent 2255 0 R +/Kids [1811 0 R 1819 0 R 1824 0 R 1831 0 R 1835 0 R 1842 0 R] >> -% 1653 0 obj +% 1855 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1650 0 R 1655 0 R 1659 0 R 1663 0 R 1667 0 R 1671 0 R] +/Parent 2255 0 R +/Kids [1846 0 R 1857 0 R 1862 0 R 1869 0 R 1875 0 R 1879 0 R] >> -% 1683 0 obj +% 1890 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1676 0 R 1685 0 R 1689 0 R 1696 0 R 1700 0 R 1707 0 R] +/Parent 2256 0 R +/Kids [1885 0 R 1892 0 R 1898 0 R 1904 0 R 1909 0 R 1916 0 R] >> -% 1717 0 obj +% 1928 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1711 0 R 1719 0 R 1723 0 R 1730 0 R 1734 0 R 1741 0 R] +/Parent 2256 0 R +/Kids [1923 0 R 1932 0 R 1939 0 R 1946 0 R 1952 0 R 1956 0 R] >> -% 1754 0 obj +% 1966 0 obj << /Type /Pages /Count 6 -/Parent 1992 0 R -/Kids [1745 0 R 1756 0 R 1761 0 R 1768 0 R 1774 0 R 1778 0 R] +/Parent 2256 0 R +/Kids [1962 0 R 1972 0 R 1976 0 R 1984 0 R 1989 0 R 1993 0 R] >> -% 1789 0 obj +% 2002 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1784 0 R 1792 0 R 1798 0 R 1804 0 R 1809 0 R 1816 0 R] +/Parent 2256 0 R +/Kids [1999 0 R 2004 0 R 2011 0 R 2022 0 R 2027 0 R 2037 0 R] >> -% 1828 0 obj +% 2057 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1823 0 R 1831 0 R 1838 0 R 1845 0 R 1851 0 R 1855 0 R] +/Parent 2256 0 R +/Kids [2052 0 R 2062 0 R 2073 0 R 2101 0 R 2121 0 R 2139 0 R] >> -% 1865 0 obj +% 2158 0 obj << /Type /Pages /Count 6 -/Parent 1993 0 R -/Kids [1861 0 R 1871 0 R 1875 0 R 1883 0 R 1888 0 R 1893 0 R] +/Parent 2256 0 R +/Kids [2155 0 R 2160 0 R 2164 0 R 2170 0 R 2174 0 R 2178 0 R] >> -% 1902 0 obj +% 2189 0 obj << /Type /Pages -/Count 6 -/Parent 1993 0 R -/Kids [1899 0 R 1904 0 R 1911 0 R 1922 0 R 1927 0 R 1931 0 R] +/Count 2 +/Parent 2257 0 R +/Kids [2182 0 R 2191 0 R] >> -% 1942 0 obj +% 2251 0 obj << /Type /Pages -/Count 2 -/Parent 1993 0 R -/Kids [1935 0 R 1944 0 R] +/Count 36 +/Parent 2258 0 R +/Kids [668 0 R 881 0 R 971 0 R 1028 0 R 1062 0 R 1092 0 R] >> -% 1989 0 obj +% 2253 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [587 0 R 780 0 R 871 0 R 927 0 R 962 0 R 991 0 R] +/Parent 2258 0 R +/Kids [1134 0 R 1180 0 R 1222 0 R 1281 0 R 1333 0 R 1382 0 R] >> -% 1990 0 obj +% 2254 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [1034 0 R 1079 0 R 1121 0 R 1180 0 R 1232 0 R 1281 0 R] +/Parent 2258 0 R +/Kids [1431 0 R 1471 0 R 1510 0 R 1557 0 R 1600 0 R 1638 0 R] >> -% 1991 0 obj +% 2255 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [1330 0 R 1371 0 R 1409 0 R 1456 0 R 1500 0 R 1537 0 R] +/Parent 2258 0 R +/Kids [1674 0 R 1713 0 R 1754 0 R 1783 0 R 1817 0 R 1855 0 R] >> -% 1992 0 obj +% 2256 0 obj << /Type /Pages /Count 36 -/Parent 1994 0 R -/Kids [1574 0 R 1612 0 R 1653 0 R 1683 0 R 1717 0 R 1754 0 R] +/Parent 2258 0 R +/Kids [1890 0 R 1928 0 R 1966 0 R 2002 0 R 2057 0 R 2158 0 R] >> -% 1993 0 obj +% 2257 0 obj << /Type /Pages -/Count 26 -/Parent 1994 0 R -/Kids [1789 0 R 1828 0 R 1865 0 R 1902 0 R 1942 0 R] +/Count 2 +/Parent 2258 0 R +/Kids [2189 0 R] >> -% 1994 0 obj +% 2258 0 obj << /Type /Pages -/Count 170 -/Kids [1989 0 R 1990 0 R 1991 0 R 1992 0 R 1993 0 R] +/Count 182 +/Kids [2251 0 R 2253 0 R 2254 0 R 2255 0 R 2256 0 R 2257 0 R] >> -% 1995 0 obj +% 2259 0 obj << /Type /Outlines /First 4 0 R /Last 4 0 R /Count 1 >> +% 655 0 obj +<< +/Title 656 0 R +/A 653 0 R +/Parent 598 0 R +/Prev 651 0 R +>> +% 651 0 obj +<< +/Title 652 0 R +/A 649 0 R +/Parent 598 0 R +/Prev 647 0 R +/Next 655 0 R +>> +% 647 0 obj +<< +/Title 648 0 R +/A 645 0 R +/Parent 598 0 R +/Prev 643 0 R +/Next 651 0 R +>> +% 643 0 obj +<< +/Title 644 0 R +/A 641 0 R +/Parent 598 0 R +/Prev 639 0 R +/Next 647 0 R +>> +% 639 0 obj +<< +/Title 640 0 R +/A 637 0 R +/Parent 598 0 R +/Prev 635 0 R +/Next 643 0 R +>> +% 635 0 obj +<< +/Title 636 0 R +/A 633 0 R +/Parent 598 0 R +/Prev 631 0 R +/Next 639 0 R +>> +% 631 0 obj +<< +/Title 632 0 R +/A 629 0 R +/Parent 598 0 R +/Prev 627 0 R +/Next 635 0 R +>> +% 627 0 obj +<< +/Title 628 0 R +/A 625 0 R +/Parent 598 0 R +/Prev 623 0 R +/Next 631 0 R +>> +% 623 0 obj +<< +/Title 624 0 R +/A 621 0 R +/Parent 598 0 R +/Prev 619 0 R +/Next 627 0 R +>> +% 619 0 obj +<< +/Title 620 0 R +/A 617 0 R +/Parent 598 0 R +/Prev 615 0 R +/Next 623 0 R +>> +% 615 0 obj +<< +/Title 616 0 R +/A 613 0 R +/Parent 598 0 R +/Prev 611 0 R +/Next 619 0 R +>> +% 611 0 obj +<< +/Title 612 0 R +/A 609 0 R +/Parent 598 0 R +/Prev 607 0 R +/Next 615 0 R +>> +% 607 0 obj +<< +/Title 608 0 R +/A 604 0 R +/Parent 598 0 R +/Prev 602 0 R +/Next 611 0 R +>> +% 602 0 obj +<< +/Title 603 0 R +/A 600 0 R +/Parent 598 0 R +/Next 607 0 R +>> +% 598 0 obj +<< +/Title 599 0 R +/A 596 0 R +/Parent 4 0 R +/Prev 578 0 R +/First 602 0 R +/Last 655 0 R +/Count -14 +>> +% 594 0 obj +<< +/Title 595 0 R +/A 592 0 R +/Parent 578 0 R +/Prev 590 0 R +>> +% 590 0 obj +<< +/Title 591 0 R +/A 588 0 R +/Parent 578 0 R +/Prev 586 0 R +/Next 594 0 R +>> +% 586 0 obj +<< +/Title 587 0 R +/A 584 0 R +/Parent 578 0 R +/Prev 582 0 R +/Next 590 0 R +>> +% 582 0 obj +<< +/Title 583 0 R +/A 580 0 R +/Parent 578 0 R +/Next 586 0 R +>> +% 578 0 obj +<< +/Title 579 0 R +/A 576 0 R +/Parent 4 0 R +/Prev 570 0 R +/Next 598 0 R +/First 582 0 R +/Last 594 0 R +/Count -4 +>> % 574 0 obj << /Title 575 0 R @@ -28629,6 +35312,7 @@ stream /A 568 0 R /Parent 4 0 R /Prev 542 0 R +/Next 578 0 R /First 574 0 R /Last 574 0 R /Count -1 @@ -28988,6 +35672,27 @@ stream /Prev 389 0 R /Next 397 0 R >> + +endstream +endobj +2260 0 obj +<< +/Type /ObjStm +/N 100 +/First 862 +/Length 9899 +>> +stream +389 0 385 88 381 176 377 264 373 352 369 440 365 528 361 616 357 704 353 792 +349 880 345 968 341 1056 337 1144 333 1232 329 1320 325 1408 321 1496 317 1584 313 1672 +309 1746 305 1872 301 1946 297 2034 293 2122 289 2196 285 2321 281 2395 277 2483 273 2571 +269 2659 265 2747 261 2835 257 2923 253 3011 249 3099 245 3187 241 3275 237 3363 233 3451 +229 3539 225 3627 221 3701 217 3826 213 3899 209 3986 205 4060 200 4148 196 4236 192 4324 +188 4412 184 4486 180 4612 176 4686 172 4774 168 4862 164 4950 160 5038 156 5126 152 5214 +148 5302 144 5390 140 5478 136 5566 132 5654 128 5742 124 5830 120 5918 116 6006 112 6094 +108 6182 104 6256 100 6382 96 6453 92 6536 88 6618 84 6700 80 6782 76 6864 72 6946 +68 7028 64 7110 60 7192 56 7274 52 7356 48 7438 44 7520 40 7589 36 7698 32 7818 +28 7887 24 7943 20 8062 16 8144 12 8213 8 8330 4 8395 2261 8488 2262 8684 2263 8857 % 389 0 obj << /Title 390 0 R @@ -29092,27 +35797,6 @@ stream /Prev 337 0 R /Next 345 0 R >> - -endstream -endobj -1996 0 obj -<< -/Type /ObjStm -/N 100 -/First 875 -/Length 11008 ->> -stream -337 0 333 88 329 176 325 264 321 352 317 440 313 528 309 602 305 728 301 802 -297 890 293 978 289 1052 285 1177 281 1251 277 1339 273 1427 269 1515 265 1603 261 1691 -257 1779 253 1867 249 1955 245 2043 241 2131 237 2219 233 2307 229 2395 225 2483 221 2557 -217 2682 213 2755 209 2842 205 2916 200 3004 196 3092 192 3180 188 3268 184 3342 180 3468 -176 3542 172 3630 168 3718 164 3806 160 3894 156 3982 152 4070 148 4158 144 4246 140 4334 -136 4422 132 4510 128 4598 124 4686 120 4774 116 4862 112 4950 108 5038 104 5112 100 5238 -96 5309 92 5392 88 5474 84 5556 80 5638 76 5720 72 5802 68 5884 64 5966 60 6048 -56 6130 52 6212 48 6294 44 6376 40 6445 36 6554 32 6674 28 6743 24 6799 20 6918 -16 7000 12 7069 8 7186 4 7251 1997 7344 1998 7540 1999 7713 2000 7893 2001 8070 2002 8247 -2003 8427 2004 8605 2005 8785 2006 8963 2007 9134 2008 9299 2009 9465 2010 9629 2011 9793 2012 9963 % 337 0 obj << /Title 338 0 R @@ -29790,652 +36474,725 @@ stream << /Title 5 0 R /A 1 0 R -/Parent 1995 0 R +/Parent 2259 0 R /First 8 0 R -/Last 570 0 R -/Count -11 +/Last 598 0 R +/Count -13 >> -% 1997 0 obj +% 2261 0 obj << -/Names [(Doc-Start) 583 0 R (Hfootnote.1) 815 0 R (Hfootnote.2) 816 0 R (Hfootnote.3) 869 0 R (Hfootnote.4) 1864 0 R (Hfootnote.5) 1917 0 R] +/Names [(Doc-Start) 664 0 R (Hfootnote.1) 916 0 R (Hfootnote.2) 917 0 R (Hfootnote.3) 969 0 R (Hfootnote.4) 1965 0 R (Hfootnote.5) 2017 0 R] /Limits [(Doc-Start) (Hfootnote.5)] >> -% 1998 0 obj +% 2262 0 obj << -/Names [(Item.1) 843 0 R (Item.10) 857 0 R (Item.100) 1598 0 R (Item.101) 1599 0 R (Item.102) 1600 0 R (Item.103) 1617 0 R] +/Names [(Item.1) 943 0 R (Item.10) 957 0 R (Item.100) 1698 0 R (Item.101) 1699 0 R (Item.102) 1700 0 R (Item.103) 1718 0 R] /Limits [(Item.1) (Item.103)] >> -% 1999 0 obj +% 2263 0 obj << -/Names [(Item.104) 1618 0 R (Item.105) 1619 0 R (Item.106) 1620 0 R (Item.107) 1621 0 R (Item.108) 1622 0 R (Item.109) 1623 0 R] +/Names [(Item.104) 1719 0 R (Item.105) 1720 0 R (Item.106) 1721 0 R (Item.107) 1722 0 R (Item.108) 1723 0 R (Item.109) 1724 0 R] /Limits [(Item.104) (Item.109)] >> -% 2000 0 obj + +endstream +endobj +2265 0 obj << -/Names [(Item.11) 858 0 R (Item.110) 1624 0 R (Item.111) 1625 0 R (Item.112) 1634 0 R (Item.113) 1635 0 R (Item.114) 1640 0 R] +/Type /ObjStm +/N 100 +/First 1037 +/Length 20320 +>> +stream +2264 0 2266 177 2267 354 2268 534 2269 712 2270 892 2271 1070 2272 1247 2273 1412 2274 1578 +2275 1744 2276 1916 2277 2086 2278 2258 2279 2428 2280 2600 2281 2769 2282 2938 2283 3110 2284 3280 +2285 3452 2286 3622 2287 3794 2288 3983 2289 4173 2290 4396 2291 4617 2292 4822 2293 5012 2294 5194 +2295 5390 2296 5615 2297 5835 2298 6066 2299 6302 2300 6534 2301 6745 2302 6922 2303 7100 2304 7280 +2305 7459 2306 7639 2307 7818 2308 7998 2309 8177 2310 8357 2311 8536 2312 8716 2313 8894 2314 9072 +2315 9252 2316 9424 2317 9596 2318 9766 2319 9938 2320 10108 2321 10280 2322 10449 2323 10618 2324 10790 +2325 10960 2326 11132 2327 11302 2328 11474 2329 11644 2330 11816 2331 11986 2332 12182 2333 12379 2334 12575 +2335 12762 2336 12944 2337 13138 2338 13368 2339 13593 2340 13812 2341 14039 2342 14265 2343 14487 2344 14712 +2345 14942 2346 15171 2347 15399 2348 15621 2349 15849 2350 16075 2351 16297 2352 16519 2353 16760 2354 17022 +2355 17278 2356 17544 2357 17814 2358 18076 2359 18338 2360 18548 2361 18728 2362 18904 2363 19073 2364 19169 +% 2264 0 obj +<< +/Names [(Item.11) 958 0 R (Item.110) 1725 0 R (Item.111) 1726 0 R (Item.112) 1735 0 R (Item.113) 1736 0 R (Item.114) 1741 0 R] /Limits [(Item.11) (Item.114)] >> -% 2001 0 obj +% 2266 0 obj << -/Names [(Item.115) 1641 0 R (Item.116) 1646 0 R (Item.117) 1647 0 R (Item.118) 1648 0 R (Item.119) 1679 0 R (Item.12) 859 0 R] +/Names [(Item.115) 1742 0 R (Item.116) 1747 0 R (Item.117) 1748 0 R (Item.118) 1749 0 R (Item.119) 1779 0 R (Item.12) 959 0 R] /Limits [(Item.115) (Item.12)] >> -% 2002 0 obj +% 2267 0 obj << -/Names [(Item.120) 1680 0 R (Item.121) 1681 0 R (Item.122) 1692 0 R (Item.123) 1693 0 R (Item.124) 1694 0 R (Item.125) 1703 0 R] +/Names [(Item.120) 1780 0 R (Item.121) 1781 0 R (Item.122) 1792 0 R (Item.123) 1793 0 R (Item.124) 1794 0 R (Item.125) 1803 0 R] /Limits [(Item.120) (Item.125)] >> -% 2003 0 obj +% 2268 0 obj << -/Names [(Item.126) 1704 0 R (Item.127) 1705 0 R (Item.128) 1714 0 R (Item.129) 1715 0 R (Item.13) 860 0 R (Item.130) 1716 0 R] +/Names [(Item.126) 1804 0 R (Item.127) 1805 0 R (Item.128) 1814 0 R (Item.129) 1815 0 R (Item.13) 960 0 R (Item.130) 1816 0 R] /Limits [(Item.126) (Item.130)] >> -% 2004 0 obj +% 2269 0 obj << -/Names [(Item.131) 1726 0 R (Item.132) 1727 0 R (Item.133) 1728 0 R (Item.134) 1737 0 R (Item.135) 1738 0 R (Item.136) 1739 0 R] +/Names [(Item.131) 1827 0 R (Item.132) 1828 0 R (Item.133) 1829 0 R (Item.134) 1838 0 R (Item.135) 1839 0 R (Item.136) 1840 0 R] /Limits [(Item.131) (Item.136)] >> -% 2005 0 obj +% 2270 0 obj << -/Names [(Item.137) 1748 0 R (Item.138) 1749 0 R (Item.139) 1751 0 R (Item.14) 861 0 R (Item.140) 1752 0 R (Item.141) 1753 0 R] +/Names [(Item.137) 1849 0 R (Item.138) 1850 0 R (Item.139) 1852 0 R (Item.14) 961 0 R (Item.140) 1853 0 R (Item.141) 1854 0 R] /Limits [(Item.137) (Item.141)] >> -% 2006 0 obj +% 2271 0 obj << -/Names [(Item.142) 1759 0 R (Item.143) 1764 0 R (Item.15) 862 0 R (Item.16) 863 0 R (Item.17) 864 0 R (Item.18) 865 0 R] -/Limits [(Item.142) (Item.18)] +/Names [(Item.142) 1860 0 R (Item.143) 1865 0 R (Item.144) 2066 0 R (Item.145) 2067 0 R (Item.146) 2167 0 R (Item.15) 962 0 R] +/Limits [(Item.142) (Item.15)] >> -% 2007 0 obj +% 2272 0 obj << -/Names [(Item.19) 866 0 R (Item.2) 844 0 R (Item.20) 867 0 R (Item.21) 868 0 R (Item.22) 877 0 R (Item.23) 878 0 R] -/Limits [(Item.19) (Item.23)] +/Names [(Item.16) 963 0 R (Item.17) 964 0 R (Item.18) 965 0 R (Item.19) 966 0 R (Item.2) 944 0 R (Item.20) 967 0 R] +/Limits [(Item.16) (Item.20)] >> -% 2008 0 obj +% 2273 0 obj << -/Names [(Item.24) 879 0 R (Item.25) 880 0 R (Item.26) 881 0 R (Item.27) 882 0 R (Item.28) 896 0 R (Item.29) 897 0 R] -/Limits [(Item.24) (Item.29)] +/Names [(Item.21) 968 0 R (Item.22) 978 0 R (Item.23) 979 0 R (Item.24) 980 0 R (Item.25) 981 0 R (Item.26) 982 0 R] +/Limits [(Item.21) (Item.26)] >> -% 2009 0 obj +% 2274 0 obj << -/Names [(Item.3) 845 0 R (Item.30) 898 0 R (Item.31) 899 0 R (Item.32) 900 0 R (Item.33) 907 0 R (Item.34) 908 0 R] -/Limits [(Item.3) (Item.34)] +/Names [(Item.27) 983 0 R (Item.28) 997 0 R (Item.29) 998 0 R (Item.3) 945 0 R (Item.30) 999 0 R (Item.31) 1000 0 R] +/Limits [(Item.27) (Item.31)] >> -% 2010 0 obj +% 2275 0 obj << -/Names [(Item.35) 909 0 R (Item.36) 910 0 R (Item.37) 911 0 R (Item.38) 912 0 R (Item.39) 913 0 R (Item.4) 846 0 R] -/Limits [(Item.35) (Item.4)] +/Names [(Item.32) 1001 0 R (Item.33) 1008 0 R (Item.34) 1009 0 R (Item.35) 1010 0 R (Item.36) 1011 0 R (Item.37) 1012 0 R] +/Limits [(Item.32) (Item.37)] >> -% 2011 0 obj +% 2276 0 obj << -/Names [(Item.40) 914 0 R (Item.41) 957 0 R (Item.42) 1050 0 R (Item.43) 1078 0 R (Item.44) 1100 0 R (Item.45) 1127 0 R] -/Limits [(Item.40) (Item.45)] +/Names [(Item.38) 1013 0 R (Item.39) 1014 0 R (Item.4) 946 0 R (Item.40) 1015 0 R (Item.41) 1057 0 R (Item.42) 1150 0 R] +/Limits [(Item.38) (Item.42)] >> -% 2012 0 obj +% 2277 0 obj << -/Names [(Item.46) 1298 0 R (Item.47) 1299 0 R (Item.48) 1300 0 R (Item.49) 1354 0 R (Item.5) 847 0 R (Item.50) 1361 0 R] -/Limits [(Item.46) (Item.50)] +/Names [(Item.43) 1179 0 R (Item.44) 1201 0 R (Item.45) 1227 0 R (Item.46) 1399 0 R (Item.47) 1400 0 R (Item.48) 1401 0 R] +/Limits [(Item.43) (Item.48)] >> - -endstream -endobj -2112 0 obj +% 2278 0 obj << - /Title (Parallel Sparse BLAS V. 3.8.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) /Producer ($Id$) /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() -/CreationDate (D:20220616091307+02'00') -/ModDate (D:20220616091307+02'00') -/Trapped /False -/PTEX.Fullbanner (This is pdfTeX, Version 3.14159265-2.6-1.40.21 (TeX Live 2020) kpathsea version 6.3.2) +/Names [(Item.49) 1454 0 R (Item.5) 947 0 R (Item.50) 1461 0 R (Item.51) 1466 0 R (Item.52) 1467 0 R (Item.53) 1468 0 R] +/Limits [(Item.49) (Item.53)] >> -endobj -2014 0 obj +% 2279 0 obj << -/Type /ObjStm -/N 98 -/First 1015 -/Length 18404 ->> -stream -2013 0 2015 172 2016 342 2017 514 2018 684 2019 856 2020 1026 2021 1198 2022 1367 2023 1544 -2024 1742 2025 1968 2026 2184 2027 2373 2028 2555 2029 2767 2030 2987 2031 3212 2032 3445 2033 3673 -2034 3846 2035 4026 2036 4203 2037 4380 2038 4560 2039 4738 2040 4918 2041 5096 2042 5276 2043 5454 -2044 5634 2045 5812 2046 5980 2047 6146 2048 6316 2049 6488 2050 6658 2051 6830 2052 7000 2053 7172 -2054 7341 2055 7510 2056 7682 2057 7852 2058 8024 2059 8194 2060 8366 2061 8534 2062 8719 2063 8899 -2064 9097 2065 9324 2066 9542 2067 9768 2068 9995 2069 10217 2070 10441 2071 10671 2072 10900 2073 11130 -2074 11352 2075 11579 2076 11806 2077 12028 2078 12250 2079 12485 2080 12749 2081 13005 2082 13269 2083 13539 -2084 13803 2085 14065 2086 14284 2087 14464 2088 14640 2089 14809 2090 14903 2091 15017 2092 15129 2093 15239 -2094 15350 2095 15463 2096 15577 2097 15690 2098 15802 2099 15913 2100 16024 2101 16143 2102 16270 2103 16396 -2104 16527 2105 16651 2106 16724 2107 16838 2108 16958 2109 17057 2110 17141 2111 17176 -% 2013 0 obj +/Names [(Item.54) 1469 0 R (Item.55) 1470 0 R (Item.56) 1482 0 R (Item.57) 1483 0 R (Item.58) 1484 0 R (Item.59) 1491 0 R] +/Limits [(Item.54) (Item.59)] +>> +% 2280 0 obj << -/Names [(Item.51) 1366 0 R (Item.52) 1367 0 R (Item.53) 1368 0 R (Item.54) 1369 0 R (Item.55) 1370 0 R (Item.56) 1382 0 R] -/Limits [(Item.51) (Item.56)] +/Names [(Item.6) 948 0 R (Item.60) 1515 0 R (Item.61) 1516 0 R (Item.62) 1523 0 R (Item.63) 1528 0 R (Item.64) 1529 0 R] +/Limits [(Item.6) (Item.64)] >> -% 2015 0 obj +% 2281 0 obj << -/Names [(Item.57) 1383 0 R (Item.58) 1384 0 R (Item.59) 1391 0 R (Item.6) 848 0 R (Item.60) 1414 0 R (Item.61) 1415 0 R] -/Limits [(Item.57) (Item.61)] +/Names [(Item.65) 1530 0 R (Item.66) 1542 0 R (Item.67) 1543 0 R (Item.68) 1544 0 R (Item.69) 1545 0 R (Item.7) 949 0 R] +/Limits [(Item.65) (Item.7)] >> -% 2016 0 obj +% 2282 0 obj << -/Names [(Item.62) 1422 0 R (Item.63) 1427 0 R (Item.64) 1428 0 R (Item.65) 1429 0 R (Item.66) 1441 0 R (Item.67) 1442 0 R] -/Limits [(Item.62) (Item.67)] +/Names [(Item.70) 1546 0 R (Item.71) 1551 0 R (Item.72) 1552 0 R (Item.73) 1553 0 R (Item.74) 1554 0 R (Item.75) 1555 0 R] +/Limits [(Item.70) (Item.75)] >> -% 2017 0 obj +% 2283 0 obj << -/Names [(Item.68) 1443 0 R (Item.69) 1444 0 R (Item.7) 849 0 R (Item.70) 1445 0 R (Item.71) 1450 0 R (Item.72) 1451 0 R] -/Limits [(Item.68) (Item.72)] +/Names [(Item.76) 1556 0 R (Item.77) 1569 0 R (Item.78) 1570 0 R (Item.79) 1571 0 R (Item.8) 950 0 R (Item.80) 1572 0 R] +/Limits [(Item.76) (Item.80)] >> -% 2018 0 obj +% 2284 0 obj << -/Names [(Item.73) 1452 0 R (Item.74) 1453 0 R (Item.75) 1454 0 R (Item.76) 1455 0 R (Item.77) 1469 0 R (Item.78) 1470 0 R] -/Limits [(Item.73) (Item.78)] +/Names [(Item.81) 1573 0 R (Item.82) 1574 0 R (Item.83) 1575 0 R (Item.84) 1588 0 R (Item.85) 1599 0 R (Item.86) 1612 0 R] +/Limits [(Item.81) (Item.86)] >> -% 2019 0 obj +% 2285 0 obj << -/Names [(Item.79) 1471 0 R (Item.8) 850 0 R (Item.80) 1472 0 R (Item.81) 1473 0 R (Item.82) 1474 0 R (Item.83) 1475 0 R] -/Limits [(Item.79) (Item.83)] +/Names [(Item.87) 1613 0 R (Item.88) 1621 0 R (Item.89) 1622 0 R (Item.9) 956 0 R (Item.90) 1643 0 R (Item.91) 1644 0 R] +/Limits [(Item.87) (Item.91)] >> -% 2020 0 obj +% 2286 0 obj << -/Names [(Item.84) 1488 0 R (Item.85) 1499 0 R (Item.86) 1511 0 R (Item.87) 1512 0 R (Item.88) 1520 0 R (Item.89) 1521 0 R] -/Limits [(Item.84) (Item.89)] +/Names [(Item.92) 1655 0 R (Item.93) 1661 0 R (Item.94) 1667 0 R (Item.95) 1673 0 R (Item.96) 1680 0 R (Item.97) 1681 0 R] +/Limits [(Item.92) (Item.97)] >> -% 2021 0 obj +% 2287 0 obj << -/Names [(Item.9) 856 0 R (Item.90) 1542 0 R (Item.91) 1543 0 R (Item.92) 1554 0 R (Item.93) 1560 0 R (Item.94) 1566 0 R] -/Limits [(Item.9) (Item.94)] +/Names [(Item.98) 1687 0 R (Item.99) 1688 0 R (algocf.1) 2071 0 R (algocf.2) 2117 0 R (algocfline.1) 2056 0 R (algocfline.2) 2124 0 R] +/Limits [(Item.98) (algocfline.2)] >> -% 2022 0 obj +% 2288 0 obj << -/Names [(Item.95) 1573 0 R (Item.96) 1580 0 R (Item.97) 1581 0 R (Item.98) 1587 0 R (Item.99) 1588 0 R (cite.2007c) 830 0 R] -/Limits [(Item.95) (cite.2007c)] +/Names [(cite.2007c) 930 0 R (cite.2007d) 931 0 R (cite.BLACS) 906 0 R (cite.BLAS1) 889 0 R (cite.BLAS2) 890 0 R (cite.BLAS3) 891 0 R] +/Limits [(cite.2007c) (cite.BLAS3)] >> -% 2023 0 obj +% 2289 0 obj << -/Names [(cite.2007d) 831 0 R (cite.BLACS) 805 0 R (cite.BLAS1) 788 0 R (cite.BLAS2) 789 0 R (cite.BLAS3) 790 0 R (cite.DesPat:11) 783 0 R] -/Limits [(cite.2007d) (cite.DesPat:11)] +/Names [(cite.CaFiRo:2014) 2042 0 R (cite.DesPat:11) 884 0 R (cite.DesignPatterns) 1046 0 R (cite.KIVA3PSBLAS) 2188 0 R (cite.METIS) 918 0 R (cite.MPI1) 2194 0 R] +/Limits [(cite.CaFiRo:2014) (cite.MPI1)] >> -% 2024 0 obj +% 2290 0 obj << -/Names [(cite.DesignPatterns) 946 0 R (cite.KIVA3PSBLAS) 1941 0 R (cite.METIS) 817 0 R (cite.MPI1) 1947 0 R (cite.PARA04FOREST) 1939 0 R (cite.PSBLAS) 1940 0 R] -/Limits [(cite.DesignPatterns) (cite.PSBLAS)] +/Names [(cite.MRC:11) 2058 0 R (cite.OurTechRep) 2043 0 R (cite.PARA04FOREST) 2186 0 R (cite.PSBLAS) 2187 0 R (cite.RouXiaXu:11) 885 0 R (cite.Sparse03) 883 0 R] +/Limits [(cite.MRC:11) (cite.Sparse03)] >> -% 2025 0 obj +% 2291 0 obj << -/Names [(cite.RouXiaXu:11) 784 0 R (cite.Sparse03) 782 0 R (cite.machiels) 785 0 R (cite.metcalf) 781 0 R (cite.sblas02) 787 0 R (cite.sblas97) 786 0 R] -/Limits [(cite.RouXiaXu:11) (cite.sblas97)] +/Names [(cite.machiels) 886 0 R (cite.metcalf) 882 0 R (cite.sblas02) 888 0 R (cite.sblas97) 887 0 R (descdata) 989 0 R (equation.4.1) 1265 0 R] +/Limits [(cite.machiels) (equation.4.1)] >> -% 2026 0 obj +% 2292 0 obj << -/Names [(descdata) 888 0 R (equation.4.1) 1165 0 R (equation.4.2) 1166 0 R (equation.4.3) 1167 0 R (figure.1) 799 0 R (figure.2) 825 0 R] -/Limits [(descdata) (figure.2)] +/Names [(equation.4.2) 1266 0 R (equation.4.3) 1267 0 R (figure.1) 900 0 R (figure.2) 926 0 R (figure.3) 1368 0 R (figure.4) 1402 0 R] +/Limits [(equation.4.2) (figure.4)] >> -% 2027 0 obj +% 2293 0 obj << -/Names [(figure.3) 1268 0 R (figure.4) 1301 0 R (listing.1) 916 0 R (listing.2) 947 0 R (listing.3) 986 0 R (listing.4) 1006 0 R] -/Limits [(figure.3) (listing.4)] +/Names [(figure.5) 2065 0 R (figure.6) 2060 0 R (figure.7) 2104 0 R (figure.8) 2116 0 R (figure.9) 2142 0 R (listing.1) 1017 0 R] +/Limits [(figure.5) (listing.1)] >> -% 2028 0 obj +% 2294 0 obj << -/Names [(listing.5) 1771 0 R (listing.6) 1772 0 R (lstlisting.-1) 1128 0 R (lstlisting.-10) 1841 0 R (lstlisting.-11) 1848 0 R (lstlisting.-12) 1914 0 R] -/Limits [(listing.5) (lstlisting.-12)] +/Names [(listing.2) 1047 0 R (listing.3) 1087 0 R (listing.4) 1107 0 R (listing.5) 1872 0 R (listing.6) 1873 0 R (lstlisting.-1) 1228 0 R] +/Limits [(listing.2) (lstlisting.-1)] >> -% 2029 0 obj +% 2295 0 obj << -/Names [(lstlisting.-2) 1781 0 R (lstlisting.-3) 1787 0 R (lstlisting.-4) 1795 0 R (lstlisting.-5) 1801 0 R (lstlisting.-6) 1812 0 R (lstlisting.-7) 1819 0 R] -/Limits [(lstlisting.-2) (lstlisting.-7)] +/Names [(lstlisting.-10) 1942 0 R (lstlisting.-11) 1949 0 R (lstlisting.-12) 2014 0 R (lstlisting.-13) 2076 0 R (lstlisting.-2) 1882 0 R (lstlisting.-3) 1888 0 R] +/Limits [(lstlisting.-10) (lstlisting.-3)] >> -% 2030 0 obj +% 2296 0 obj << -/Names [(lstlisting.-8) 1826 0 R (lstlisting.-9) 1834 0 R (lstnumber.-1.1) 1129 0 R (lstnumber.-1.2) 1130 0 R (lstnumber.-1.3) 1131 0 R (lstnumber.-1.4) 1132 0 R] -/Limits [(lstlisting.-8) (lstnumber.-1.4)] +/Names [(lstlisting.-4) 1895 0 R (lstlisting.-5) 1901 0 R (lstlisting.-6) 1912 0 R (lstlisting.-7) 1919 0 R (lstlisting.-8) 1926 0 R (lstlisting.-9) 1935 0 R] +/Limits [(lstlisting.-4) (lstlisting.-9)] >> -% 2031 0 obj +% 2297 0 obj << -/Names [(lstnumber.-10.1) 1842 0 R (lstnumber.-11.1) 1849 0 R (lstnumber.-12.1) 1915 0 R (lstnumber.-12.2) 1916 0 R (lstnumber.-2.1) 1782 0 R (lstnumber.-3.1) 1788 0 R] -/Limits [(lstnumber.-10.1) (lstnumber.-3.1)] +/Names [(lstnumber.-1.1) 1229 0 R (lstnumber.-1.2) 1230 0 R (lstnumber.-1.3) 1231 0 R (lstnumber.-1.4) 1232 0 R (lstnumber.-10.1) 1943 0 R (lstnumber.-11.1) 1950 0 R] +/Limits [(lstnumber.-1.1) (lstnumber.-11.1)] >> -% 2032 0 obj +% 2298 0 obj << -/Names [(lstnumber.-4.1) 1796 0 R (lstnumber.-5.1) 1802 0 R (lstnumber.-6.1) 1813 0 R (lstnumber.-7.1) 1820 0 R (lstnumber.-8.1) 1827 0 R (lstnumber.-9.1) 1835 0 R] -/Limits [(lstnumber.-4.1) (lstnumber.-9.1)] +/Names [(lstnumber.-12.1) 2015 0 R (lstnumber.-12.2) 2016 0 R (lstnumber.-13.1) 2077 0 R (lstnumber.-13.2) 2078 0 R (lstnumber.-13.3) 2079 0 R (lstnumber.-13.4) 2080 0 R] +/Limits [(lstnumber.-12.1) (lstnumber.-13.4)] >> -% 2033 0 obj +% 2299 0 obj << -/Names [(page.1) 582 0 R (page.10) 895 0 R (page.100) 1553 0 R (page.101) 1559 0 R (page.102) 1565 0 R (page.103) 1572 0 R] -/Limits [(page.1) (page.103)] +/Names [(lstnumber.-13.5) 2081 0 R (lstnumber.-13.6) 2082 0 R (lstnumber.-13.7) 2083 0 R (lstnumber.-2.1) 1883 0 R (lstnumber.-3.1) 1889 0 R (lstnumber.-4.1) 1896 0 R] +/Limits [(lstnumber.-13.5) (lstnumber.-4.1)] >> -% 2034 0 obj +% 2300 0 obj << -/Names [(page.104) 1579 0 R (page.105) 1586 0 R (page.106) 1593 0 R (page.107) 1597 0 R (page.108) 1607 0 R (page.109) 1611 0 R] -/Limits [(page.104) (page.109)] +/Names [(lstnumber.-5.1) 1902 0 R (lstnumber.-6.1) 1913 0 R (lstnumber.-7.1) 1920 0 R (lstnumber.-8.1) 1927 0 R (lstnumber.-9.1) 1936 0 R (page.1) 663 0 R] +/Limits [(lstnumber.-5.1) (page.1)] >> -% 2035 0 obj +% 2301 0 obj << -/Names [(page.11) 906 0 R (page.110) 1616 0 R (page.111) 1629 0 R (page.112) 1633 0 R (page.113) 1639 0 R (page.114) 1645 0 R] -/Limits [(page.11) (page.114)] +/Names [(page.10) 996 0 R (page.100) 1654 0 R (page.101) 1660 0 R (page.102) 1666 0 R (page.103) 1672 0 R (page.104) 1679 0 R] +/Limits [(page.10) (page.104)] >> -% 2036 0 obj +% 2302 0 obj << -/Names [(page.115) 1652 0 R (page.116) 1657 0 R (page.117) 1661 0 R (page.118) 1665 0 R (page.119) 1669 0 R (page.12) 922 0 R] -/Limits [(page.115) (page.12)] +/Names [(page.105) 1686 0 R (page.106) 1693 0 R (page.107) 1697 0 R (page.108) 1707 0 R (page.109) 1712 0 R (page.11) 1007 0 R] +/Limits [(page.105) (page.11)] >> -% 2037 0 obj +% 2303 0 obj << -/Names [(page.120) 1673 0 R (page.121) 1678 0 R (page.122) 1687 0 R (page.123) 1691 0 R (page.124) 1698 0 R (page.125) 1702 0 R] -/Limits [(page.120) (page.125)] +/Names [(page.110) 1717 0 R (page.111) 1730 0 R (page.112) 1734 0 R (page.113) 1740 0 R (page.114) 1746 0 R (page.115) 1753 0 R] +/Limits [(page.110) (page.115)] >> -% 2038 0 obj +% 2304 0 obj << -/Names [(page.126) 1709 0 R (page.127) 1713 0 R (page.128) 1721 0 R (page.129) 1725 0 R (page.13) 926 0 R (page.130) 1732 0 R] -/Limits [(page.126) (page.130)] +/Names [(page.116) 1758 0 R (page.117) 1762 0 R (page.118) 1766 0 R (page.119) 1770 0 R (page.12) 1023 0 R (page.120) 1774 0 R] +/Limits [(page.116) (page.120)] >> -% 2039 0 obj +% 2305 0 obj << -/Names [(page.131) 1736 0 R (page.132) 1743 0 R (page.133) 1747 0 R (page.134) 1758 0 R (page.135) 1763 0 R (page.136) 1770 0 R] -/Limits [(page.131) (page.136)] +/Names [(page.121) 1778 0 R (page.122) 1787 0 R (page.123) 1791 0 R (page.124) 1798 0 R (page.125) 1802 0 R (page.126) 1809 0 R] +/Limits [(page.121) (page.126)] >> -% 2040 0 obj +% 2306 0 obj << -/Names [(page.137) 1776 0 R (page.138) 1780 0 R (page.139) 1786 0 R (page.14) 932 0 R (page.140) 1794 0 R (page.141) 1800 0 R] -/Limits [(page.137) (page.141)] +/Names [(page.127) 1813 0 R (page.128) 1821 0 R (page.129) 1826 0 R (page.13) 1027 0 R (page.130) 1833 0 R (page.131) 1837 0 R] +/Limits [(page.127) (page.131)] >> -% 2041 0 obj +% 2307 0 obj << -/Names [(page.142) 1806 0 R (page.143) 1811 0 R (page.144) 1818 0 R (page.145) 1825 0 R (page.146) 1833 0 R (page.147) 1840 0 R] -/Limits [(page.142) (page.147)] +/Names [(page.132) 1844 0 R (page.133) 1848 0 R (page.134) 1859 0 R (page.135) 1864 0 R (page.136) 1871 0 R (page.137) 1877 0 R] +/Limits [(page.132) (page.137)] >> -% 2042 0 obj +% 2308 0 obj << -/Names [(page.148) 1847 0 R (page.149) 1853 0 R (page.15) 937 0 R (page.150) 1857 0 R (page.151) 1863 0 R (page.152) 1873 0 R] -/Limits [(page.148) (page.152)] +/Names [(page.138) 1881 0 R (page.139) 1887 0 R (page.14) 1032 0 R (page.140) 1894 0 R (page.141) 1900 0 R (page.142) 1906 0 R] +/Limits [(page.138) (page.142)] >> -% 2043 0 obj +% 2309 0 obj << -/Names [(page.153) 1877 0 R (page.154) 1885 0 R (page.155) 1890 0 R (page.156) 1895 0 R (page.157) 1901 0 R (page.158) 1906 0 R] -/Limits [(page.153) (page.158)] +/Names [(page.143) 1911 0 R (page.144) 1918 0 R (page.145) 1925 0 R (page.146) 1934 0 R (page.147) 1941 0 R (page.148) 1948 0 R] +/Limits [(page.143) (page.148)] >> -% 2044 0 obj +% 2310 0 obj << -/Names [(page.159) 1913 0 R (page.16) 944 0 R (page.160) 1924 0 R (page.161) 1929 0 R (page.162) 1933 0 R (page.163) 1937 0 R] -/Limits [(page.159) (page.163)] +/Names [(page.149) 1954 0 R (page.15) 1037 0 R (page.150) 1958 0 R (page.151) 1964 0 R (page.152) 1974 0 R (page.153) 1978 0 R] +/Limits [(page.149) (page.153)] >> -% 2045 0 obj +% 2311 0 obj << -/Names [(page.164) 1946 0 R (page.17) 951 0 R (page.18) 956 0 R (page.19) 961 0 R (page.2) 592 0 R (page.20) 966 0 R] -/Limits [(page.164) (page.20)] +/Names [(page.154) 1986 0 R (page.155) 1991 0 R (page.156) 1995 0 R (page.157) 2001 0 R (page.158) 2006 0 R (page.159) 2013 0 R] +/Limits [(page.154) (page.159)] >> -% 2046 0 obj +% 2312 0 obj << -/Names [(page.21) 970 0 R (page.22) 974 0 R (page.23) 978 0 R (page.24) 984 0 R (page.25) 990 0 R (page.26) 997 0 R] -/Limits [(page.21) (page.26)] +/Names [(page.16) 1044 0 R (page.160) 2024 0 R (page.161) 2029 0 R (page.162) 2039 0 R (page.163) 2054 0 R (page.164) 2064 0 R] +/Limits [(page.16) (page.164)] >> -% 2047 0 obj +% 2313 0 obj << -/Names [(page.27) 1004 0 R (page.28) 1010 0 R (page.29) 1015 0 R (page.3) 811 0 R (page.30) 1026 0 R (page.31) 1033 0 R] -/Limits [(page.27) (page.31)] +/Names [(page.165) 2075 0 R (page.166) 2103 0 R (page.167) 2123 0 R (page.168) 2141 0 R (page.169) 2157 0 R (page.17) 1051 0 R] +/Limits [(page.165) (page.17)] >> -% 2048 0 obj +% 2314 0 obj << -/Names [(page.32) 1043 0 R (page.33) 1049 0 R (page.34) 1060 0 R (page.35) 1065 0 R (page.36) 1072 0 R (page.37) 1077 0 R] -/Limits [(page.32) (page.37)] +/Names [(page.170) 2162 0 R (page.171) 2166 0 R (page.172) 2172 0 R (page.173) 2176 0 R (page.174) 2180 0 R (page.175) 2184 0 R] +/Limits [(page.170) (page.175)] >> -% 2049 0 obj +% 2315 0 obj << -/Names [(page.38) 1086 0 R (page.39) 1094 0 R (page.4) 824 0 R (page.40) 1099 0 R (page.41) 1107 0 R (page.42) 1112 0 R] -/Limits [(page.38) (page.42)] +/Names [(page.176) 2193 0 R (page.18) 1056 0 R (page.19) 1061 0 R (page.2) 673 0 R (page.20) 1067 0 R (page.21) 1071 0 R] +/Limits [(page.176) (page.21)] >> -% 2050 0 obj +% 2316 0 obj << -/Names [(page.43) 1119 0 R (page.44) 1126 0 R (page.45) 1139 0 R (page.46) 1146 0 R (page.47) 1153 0 R (page.48) 1164 0 R] -/Limits [(page.43) (page.48)] +/Names [(page.22) 1075 0 R (page.23) 1079 0 R (page.24) 1085 0 R (page.25) 1091 0 R (page.26) 1098 0 R (page.27) 1105 0 R] +/Limits [(page.22) (page.27)] >> -% 2051 0 obj +% 2317 0 obj << -/Names [(page.49) 1179 0 R (page.5) 837 0 R (page.50) 1186 0 R (page.51) 1197 0 R (page.52) 1203 0 R (page.53) 1214 0 R] -/Limits [(page.49) (page.53)] +/Names [(page.28) 1111 0 R (page.29) 1115 0 R (page.3) 912 0 R (page.30) 1126 0 R (page.31) 1133 0 R (page.32) 1143 0 R] +/Limits [(page.28) (page.32)] >> -% 2052 0 obj +% 2318 0 obj << -/Names [(page.54) 1219 0 R (page.55) 1230 0 R (page.56) 1237 0 R (page.57) 1246 0 R (page.58) 1252 0 R (page.59) 1260 0 R] -/Limits [(page.54) (page.59)] +/Names [(page.33) 1149 0 R (page.34) 1160 0 R (page.35) 1166 0 R (page.36) 1173 0 R (page.37) 1178 0 R (page.38) 1187 0 R] +/Limits [(page.33) (page.38)] >> -% 2053 0 obj +% 2319 0 obj << -/Names [(page.6) 842 0 R (page.60) 1267 0 R (page.61) 1280 0 R (page.62) 1288 0 R (page.63) 1297 0 R (page.64) 1305 0 R] -/Limits [(page.6) (page.64)] +/Names [(page.39) 1195 0 R (page.4) 925 0 R (page.40) 1200 0 R (page.41) 1208 0 R (page.42) 1213 0 R (page.43) 1220 0 R] +/Limits [(page.39) (page.43)] >> -% 2054 0 obj +% 2320 0 obj << -/Names [(page.65) 1309 0 R (page.66) 1324 0 R (page.67) 1329 0 R (page.68) 1336 0 R (page.69) 1343 0 R (page.7) 855 0 R] -/Limits [(page.65) (page.7)] +/Names [(page.44) 1226 0 R (page.45) 1239 0 R (page.46) 1246 0 R (page.47) 1253 0 R (page.48) 1264 0 R (page.49) 1280 0 R] +/Limits [(page.44) (page.49)] >> -% 2055 0 obj +% 2321 0 obj << -/Names [(page.70) 1347 0 R (page.71) 1353 0 R (page.72) 1359 0 R (page.73) 1365 0 R (page.74) 1376 0 R (page.75) 1381 0 R] -/Limits [(page.70) (page.75)] +/Names [(page.5) 937 0 R (page.50) 1287 0 R (page.51) 1298 0 R (page.52) 1304 0 R (page.53) 1315 0 R (page.54) 1320 0 R] +/Limits [(page.5) (page.54)] >> -% 2056 0 obj +% 2322 0 obj << -/Names [(page.76) 1390 0 R (page.77) 1397 0 R (page.78) 1402 0 R (page.79) 1408 0 R (page.8) 876 0 R (page.80) 1413 0 R] -/Limits [(page.76) (page.80)] +/Names [(page.55) 1331 0 R (page.56) 1337 0 R (page.57) 1346 0 R (page.58) 1352 0 R (page.59) 1360 0 R (page.6) 942 0 R] +/Limits [(page.55) (page.6)] >> -% 2057 0 obj +% 2323 0 obj << -/Names [(page.81) 1421 0 R (page.82) 1426 0 R (page.83) 1434 0 R (page.84) 1440 0 R (page.85) 1449 0 R (page.86) 1463 0 R] -/Limits [(page.81) (page.86)] +/Names [(page.60) 1367 0 R (page.61) 1381 0 R (page.62) 1389 0 R (page.63) 1398 0 R (page.64) 1406 0 R (page.65) 1410 0 R] +/Limits [(page.60) (page.65)] >> -% 2058 0 obj +% 2324 0 obj << -/Names [(page.87) 1468 0 R (page.88) 1481 0 R (page.89) 1487 0 R (page.9) 887 0 R (page.90) 1494 0 R (page.91) 1498 0 R] -/Limits [(page.87) (page.91)] +/Names [(page.66) 1425 0 R (page.67) 1430 0 R (page.68) 1437 0 R (page.69) 1444 0 R (page.7) 955 0 R (page.70) 1448 0 R] +/Limits [(page.66) (page.70)] >> -% 2059 0 obj +% 2325 0 obj << -/Names [(page.92) 1506 0 R (page.93) 1510 0 R (page.94) 1519 0 R (page.95) 1527 0 R (page.96) 1531 0 R (page.97) 1536 0 R] -/Limits [(page.92) (page.97)] +/Names [(page.71) 1453 0 R (page.72) 1459 0 R (page.73) 1465 0 R (page.74) 1476 0 R (page.75) 1481 0 R (page.76) 1490 0 R] +/Limits [(page.71) (page.76)] >> -% 2060 0 obj +% 2326 0 obj << -/Names [(page.98) 1541 0 R (page.99) 1548 0 R (page.i) 638 0 R (page.ii) 696 0 R (page.iii) 742 0 R (page.iv) 763 0 R] -/Limits [(page.98) (page.iv)] +/Names [(page.77) 1498 0 R (page.78) 1503 0 R (page.79) 1509 0 R (page.8) 977 0 R (page.80) 1514 0 R (page.81) 1522 0 R] +/Limits [(page.77) (page.81)] >> -% 2061 0 obj +% 2327 0 obj << -/Names [(precdata) 1005 0 R (section*.1) 639 0 R (section*.2) 1938 0 R (section.1) 7 0 R (section.10) 541 0 R (section.11) 569 0 R] -/Limits [(precdata) (section.11)] +/Names [(page.82) 1527 0 R (page.83) 1535 0 R (page.84) 1541 0 R (page.85) 1550 0 R (page.86) 1564 0 R (page.87) 1568 0 R] +/Limits [(page.82) (page.87)] >> -% 2062 0 obj +% 2328 0 obj << -/Names [(section.2) 11 0 R (section.3) 35 0 R (section.4) 220 0 R (section.5) 288 0 R (section.6) 308 0 R (section.7) 421 0 R] -/Limits [(section.2) (section.7)] +/Names [(page.88) 1581 0 R (page.89) 1587 0 R (page.9) 988 0 R (page.90) 1594 0 R (page.91) 1598 0 R (page.92) 1606 0 R] +/Limits [(page.88) (page.92)] >> -% 2063 0 obj +% 2329 0 obj << -/Names [(section.8) 493 0 R (section.9) 513 0 R (spbasedata) 952 0 R (spdata) 945 0 R (subsection.10.1) 545 0 R (subsection.10.2) 549 0 R] -/Limits [(section.8) (subsection.10.2)] +/Names [(page.93) 1611 0 R (page.94) 1620 0 R (page.95) 1628 0 R (page.96) 1632 0 R (page.97) 1637 0 R (page.98) 1642 0 R] +/Limits [(page.93) (page.98)] >> -% 2064 0 obj +% 2330 0 obj << -/Names [(subsection.10.3) 553 0 R (subsection.10.4) 557 0 R (subsection.10.5) 561 0 R (subsection.10.6) 565 0 R (subsection.11.1) 573 0 R (subsection.2.1) 15 0 R] -/Limits [(subsection.10.3) (subsection.2.1)] +/Names [(page.99) 1649 0 R (page.i) 719 0 R (page.ii) 776 0 R (page.iii) 823 0 R (page.iv) 863 0 R (precdata) 1106 0 R] +/Limits [(page.99) (precdata)] >> -% 2065 0 obj +% 2331 0 obj << -/Names [(subsection.2.2) 19 0 R (subsection.2.3) 23 0 R (subsection.2.4) 31 0 R (subsection.3.1) 39 0 R (subsection.3.2) 103 0 R (subsection.3.3) 183 0 R] -/Limits [(subsection.2.2) (subsection.3.3)] +/Names [(section*.1) 720 0 R (section*.10) 618 0 R (section*.11) 622 0 R (section*.12) 626 0 R (section*.13) 630 0 R (section*.14) 634 0 R] +/Limits [(section*.1) (section*.14)] >> -% 2066 0 obj +% 2332 0 obj << -/Names [(subsection.3.4) 212 0 R (subsection.3.5) 216 0 R (subsection.4.1) 224 0 R (subsection.4.10) 260 0 R (subsection.4.11) 264 0 R (subsection.4.12) 268 0 R] -/Limits [(subsection.3.4) (subsection.4.12)] +/Names [(section*.15) 638 0 R (section*.16) 642 0 R (section*.17) 646 0 R (section*.18) 650 0 R (section*.19) 654 0 R (section*.2) 2055 0 R] +/Limits [(section*.15) (section*.2)] >> -% 2067 0 obj +% 2333 0 obj << -/Names [(subsection.4.13) 272 0 R (subsection.4.14) 276 0 R (subsection.4.15) 280 0 R (subsection.4.16) 284 0 R (subsection.4.2) 228 0 R (subsection.4.3) 232 0 R] -/Limits [(subsection.4.13) (subsection.4.3)] +/Names [(section*.20) 2185 0 R (section*.3) 2084 0 R (section*.4) 2105 0 R (section*.5) 2125 0 R (section*.6) 601 0 R (section*.7) 606 0 R] +/Limits [(section*.20) (section*.7)] >> -% 2068 0 obj +% 2334 0 obj << -/Names [(subsection.4.4) 236 0 R (subsection.4.5) 240 0 R (subsection.4.6) 244 0 R (subsection.4.7) 248 0 R (subsection.4.8) 252 0 R (subsection.4.9) 256 0 R] -/Limits [(subsection.4.4) (subsection.4.9)] +/Names [(section*.8) 610 0 R (section*.9) 614 0 R (section.1) 7 0 R (section.10) 541 0 R (section.11) 569 0 R (section.12) 577 0 R] +/Limits [(section*.8) (section.12)] >> -% 2069 0 obj +% 2335 0 obj << -/Names [(subsection.5.1) 292 0 R (subsection.5.2) 296 0 R (subsection.5.3) 300 0 R (subsection.5.4) 304 0 R (subsection.6.1) 312 0 R (subsection.6.10) 348 0 R] -/Limits [(subsection.5.1) (subsection.6.10)] +/Names [(section.13) 597 0 R (section.2) 11 0 R (section.3) 35 0 R (section.4) 220 0 R (section.5) 288 0 R (section.6) 308 0 R] +/Limits [(section.13) (section.6)] >> -% 2070 0 obj +% 2336 0 obj << -/Names [(subsection.6.11) 352 0 R (subsection.6.12) 356 0 R (subsection.6.13) 360 0 R (subsection.6.14) 364 0 R (subsection.6.15) 368 0 R (subsection.6.16) 372 0 R] -/Limits [(subsection.6.11) (subsection.6.16)] +/Names [(section.7) 421 0 R (section.8) 493 0 R (section.9) 513 0 R (spbasedata) 1052 0 R (spdata) 1045 0 R (subsection.10.1) 545 0 R] +/Limits [(section.7) (subsection.10.1)] >> -% 2071 0 obj +% 2337 0 obj << -/Names [(subsection.6.17) 376 0 R (subsection.6.18) 380 0 R (subsection.6.19) 384 0 R (subsection.6.2) 316 0 R (subsection.6.20) 388 0 R (subsection.6.21) 392 0 R] -/Limits [(subsection.6.17) (subsection.6.21)] +/Names [(subsection.10.2) 549 0 R (subsection.10.3) 553 0 R (subsection.10.4) 557 0 R (subsection.10.5) 561 0 R (subsection.10.6) 565 0 R (subsection.11.1) 573 0 R] +/Limits [(subsection.10.2) (subsection.11.1)] >> -% 2072 0 obj +% 2338 0 obj << -/Names [(subsection.6.22) 396 0 R (subsection.6.23) 400 0 R (subsection.6.24) 405 0 R (subsection.6.25) 409 0 R (subsection.6.26) 413 0 R (subsection.6.27) 417 0 R] -/Limits [(subsection.6.22) (subsection.6.27)] +/Names [(subsection.12.1) 581 0 R (subsection.12.2) 585 0 R (subsection.12.3) 589 0 R (subsection.12.4) 593 0 R (subsection.2.1) 15 0 R (subsection.2.2) 19 0 R] +/Limits [(subsection.12.1) (subsection.2.2)] >> -% 2073 0 obj +% 2339 0 obj << -/Names [(subsection.6.3) 320 0 R (subsection.6.4) 324 0 R (subsection.6.5) 328 0 R (subsection.6.6) 332 0 R (subsection.6.7) 336 0 R (subsection.6.8) 340 0 R] -/Limits [(subsection.6.3) (subsection.6.8)] +/Names [(subsection.2.3) 23 0 R (subsection.2.4) 31 0 R (subsection.3.1) 39 0 R (subsection.3.2) 103 0 R (subsection.3.3) 183 0 R (subsection.3.4) 212 0 R] +/Limits [(subsection.2.3) (subsection.3.4)] >> -% 2074 0 obj +% 2340 0 obj << -/Names [(subsection.6.9) 344 0 R (subsection.7.1) 425 0 R (subsection.7.10) 461 0 R (subsection.7.11) 465 0 R (subsection.7.12) 469 0 R (subsection.7.13) 473 0 R] -/Limits [(subsection.6.9) (subsection.7.13)] +/Names [(subsection.3.5) 216 0 R (subsection.4.1) 224 0 R (subsection.4.10) 260 0 R (subsection.4.11) 264 0 R (subsection.4.12) 268 0 R (subsection.4.13) 272 0 R] +/Limits [(subsection.3.5) (subsection.4.13)] >> -% 2075 0 obj +% 2341 0 obj << -/Names [(subsection.7.14) 477 0 R (subsection.7.15) 481 0 R (subsection.7.16) 485 0 R (subsection.7.17) 489 0 R (subsection.7.2) 429 0 R (subsection.7.3) 433 0 R] -/Limits [(subsection.7.14) (subsection.7.3)] +/Names [(subsection.4.14) 276 0 R (subsection.4.15) 280 0 R (subsection.4.16) 284 0 R (subsection.4.2) 228 0 R (subsection.4.3) 232 0 R (subsection.4.4) 236 0 R] +/Limits [(subsection.4.14) (subsection.4.4)] >> -% 2076 0 obj +% 2342 0 obj << -/Names [(subsection.7.4) 437 0 R (subsection.7.5) 441 0 R (subsection.7.6) 445 0 R (subsection.7.7) 449 0 R (subsection.7.8) 453 0 R (subsection.7.9) 457 0 R] -/Limits [(subsection.7.4) (subsection.7.9)] +/Names [(subsection.4.5) 240 0 R (subsection.4.6) 244 0 R (subsection.4.7) 248 0 R (subsection.4.8) 252 0 R (subsection.4.9) 256 0 R (subsection.5.1) 292 0 R] +/Limits [(subsection.4.5) (subsection.5.1)] >> -% 2077 0 obj +% 2343 0 obj << -/Names [(subsection.8.1) 497 0 R (subsection.8.2) 501 0 R (subsection.8.3) 505 0 R (subsection.8.4) 509 0 R (subsection.9.1) 517 0 R (subsection.9.2) 521 0 R] -/Limits [(subsection.8.1) (subsection.9.2)] +/Names [(subsection.5.2) 296 0 R (subsection.5.3) 300 0 R (subsection.5.4) 304 0 R (subsection.6.1) 312 0 R (subsection.6.10) 348 0 R (subsection.6.11) 352 0 R] +/Limits [(subsection.5.2) (subsection.6.11)] >> -% 2078 0 obj +% 2344 0 obj << -/Names [(subsection.9.3) 525 0 R (subsection.9.4) 529 0 R (subsection.9.5) 533 0 R (subsection.9.6) 537 0 R (subsubsection.2.3.1) 27 0 R (subsubsection.3.1.1) 43 0 R] -/Limits [(subsection.9.3) (subsubsection.3.1.1)] +/Names [(subsection.6.12) 356 0 R (subsection.6.13) 360 0 R (subsection.6.14) 364 0 R (subsection.6.15) 368 0 R (subsection.6.16) 372 0 R (subsection.6.17) 376 0 R] +/Limits [(subsection.6.12) (subsection.6.17)] >> -% 2079 0 obj +% 2345 0 obj << -/Names [(subsubsection.3.1.10) 79 0 R (subsubsection.3.1.11) 83 0 R (subsubsection.3.1.12) 87 0 R (subsubsection.3.1.13) 91 0 R (subsubsection.3.1.14) 95 0 R (subsubsection.3.1.15) 99 0 R] -/Limits [(subsubsection.3.1.10) (subsubsection.3.1.15)] +/Names [(subsection.6.18) 380 0 R (subsection.6.19) 384 0 R (subsection.6.2) 316 0 R (subsection.6.20) 388 0 R (subsection.6.21) 392 0 R (subsection.6.22) 396 0 R] +/Limits [(subsection.6.18) (subsection.6.22)] >> -% 2080 0 obj +% 2346 0 obj << -/Names [(subsubsection.3.1.2) 47 0 R (subsubsection.3.1.3) 51 0 R (subsubsection.3.1.4) 55 0 R (subsubsection.3.1.5) 59 0 R (subsubsection.3.1.6) 63 0 R (subsubsection.3.1.7) 67 0 R] -/Limits [(subsubsection.3.1.2) (subsubsection.3.1.7)] +/Names [(subsection.6.23) 400 0 R (subsection.6.24) 405 0 R (subsection.6.25) 409 0 R (subsection.6.26) 413 0 R (subsection.6.27) 417 0 R (subsection.6.3) 320 0 R] +/Limits [(subsection.6.23) (subsection.6.3)] >> -% 2081 0 obj +% 2347 0 obj << -/Names [(subsubsection.3.1.8) 71 0 R (subsubsection.3.1.9) 75 0 R (subsubsection.3.2.1) 107 0 R (subsubsection.3.2.10) 143 0 R (subsubsection.3.2.11) 147 0 R (subsubsection.3.2.12) 151 0 R] -/Limits [(subsubsection.3.1.8) (subsubsection.3.2.12)] +/Names [(subsection.6.4) 324 0 R (subsection.6.5) 328 0 R (subsection.6.6) 332 0 R (subsection.6.7) 336 0 R (subsection.6.8) 340 0 R (subsection.6.9) 344 0 R] +/Limits [(subsection.6.4) (subsection.6.9)] >> -% 2082 0 obj +% 2348 0 obj << -/Names [(subsubsection.3.2.13) 155 0 R (subsubsection.3.2.14) 159 0 R (subsubsection.3.2.15) 163 0 R (subsubsection.3.2.16) 167 0 R (subsubsection.3.2.17) 171 0 R (subsubsection.3.2.18) 175 0 R] -/Limits [(subsubsection.3.2.13) (subsubsection.3.2.18)] +/Names [(subsection.7.1) 425 0 R (subsection.7.10) 461 0 R (subsection.7.11) 465 0 R (subsection.7.12) 469 0 R (subsection.7.13) 473 0 R (subsection.7.14) 477 0 R] +/Limits [(subsection.7.1) (subsection.7.14)] >> -% 2083 0 obj +% 2349 0 obj << -/Names [(subsubsection.3.2.19) 179 0 R (subsubsection.3.2.2) 111 0 R (subsubsection.3.2.3) 115 0 R (subsubsection.3.2.4) 119 0 R (subsubsection.3.2.5) 123 0 R (subsubsection.3.2.6) 127 0 R] -/Limits [(subsubsection.3.2.19) (subsubsection.3.2.6)] +/Names [(subsection.7.15) 481 0 R (subsection.7.16) 485 0 R (subsection.7.17) 489 0 R (subsection.7.2) 429 0 R (subsection.7.3) 433 0 R (subsection.7.4) 437 0 R] +/Limits [(subsection.7.15) (subsection.7.4)] >> -% 2084 0 obj +% 2350 0 obj << -/Names [(subsubsection.3.2.7) 131 0 R (subsubsection.3.2.8) 135 0 R (subsubsection.3.2.9) 139 0 R (subsubsection.3.3.1) 187 0 R (subsubsection.3.3.2) 191 0 R (subsubsection.3.3.3) 195 0 R] -/Limits [(subsubsection.3.2.7) (subsubsection.3.3.3)] +/Names [(subsection.7.5) 441 0 R (subsection.7.6) 445 0 R (subsection.7.7) 449 0 R (subsection.7.8) 453 0 R (subsection.7.9) 457 0 R (subsection.8.1) 497 0 R] +/Limits [(subsection.7.5) (subsection.8.1)] >> -% 2085 0 obj +% 2351 0 obj << -/Names [(subsubsection.3.3.4) 199 0 R (subsubsection.3.3.5) 204 0 R (subsubsection.3.3.6) 208 0 R (table.1) 998 0 R (table.10) 1147 0 R (table.11) 1155 0 R] -/Limits [(subsubsection.3.3.4) (table.11)] +/Names [(subsection.8.2) 501 0 R (subsection.8.3) 505 0 R (subsection.8.4) 509 0 R (subsection.9.1) 517 0 R (subsection.9.2) 521 0 R (subsection.9.3) 525 0 R] +/Limits [(subsection.8.2) (subsection.9.3)] >> -% 2086 0 obj +% 2352 0 obj << -/Names [(table.12) 1168 0 R (table.13) 1187 0 R (table.14) 1215 0 R (table.15) 1231 0 R (table.16) 1247 0 R (table.17) 1261 0 R] -/Limits [(table.12) (table.17)] +/Names [(subsection.9.4) 529 0 R (subsection.9.5) 533 0 R (subsection.9.6) 537 0 R (subsubsection.2.3.1) 27 0 R (subsubsection.3.1.1) 43 0 R (subsubsection.3.1.10) 79 0 R] +/Limits [(subsection.9.4) (subsubsection.3.1.10)] >> -% 2087 0 obj +% 2353 0 obj << -/Names [(table.18) 1289 0 R (table.19) 1325 0 R (table.2) 1044 0 R (table.20) 1337 0 R (table.3) 1061 0 R (table.4) 1073 0 R] -/Limits [(table.18) (table.4)] +/Names [(subsubsection.3.1.11) 83 0 R (subsubsection.3.1.12) 87 0 R (subsubsection.3.1.13) 91 0 R (subsubsection.3.1.14) 95 0 R (subsubsection.3.1.15) 99 0 R (subsubsection.3.1.2) 47 0 R] +/Limits [(subsubsection.3.1.11) (subsubsection.3.1.2)] >> -% 2088 0 obj +% 2354 0 obj +<< +/Names [(subsubsection.3.1.3) 51 0 R (subsubsection.3.1.4) 55 0 R (subsubsection.3.1.5) 59 0 R (subsubsection.3.1.6) 63 0 R (subsubsection.3.1.7) 67 0 R (subsubsection.3.1.8) 71 0 R] +/Limits [(subsubsection.3.1.3) (subsubsection.3.1.8)] +>> +% 2355 0 obj +<< +/Names [(subsubsection.3.1.9) 75 0 R (subsubsection.3.2.1) 107 0 R (subsubsection.3.2.10) 143 0 R (subsubsection.3.2.11) 147 0 R (subsubsection.3.2.12) 151 0 R (subsubsection.3.2.13) 155 0 R] +/Limits [(subsubsection.3.1.9) (subsubsection.3.2.13)] +>> +% 2356 0 obj +<< +/Names [(subsubsection.3.2.14) 159 0 R (subsubsection.3.2.15) 163 0 R (subsubsection.3.2.16) 167 0 R (subsubsection.3.2.17) 171 0 R (subsubsection.3.2.18) 175 0 R (subsubsection.3.2.19) 179 0 R] +/Limits [(subsubsection.3.2.14) (subsubsection.3.2.19)] +>> +% 2357 0 obj +<< +/Names [(subsubsection.3.2.2) 111 0 R (subsubsection.3.2.3) 115 0 R (subsubsection.3.2.4) 119 0 R (subsubsection.3.2.5) 123 0 R (subsubsection.3.2.6) 127 0 R (subsubsection.3.2.7) 131 0 R] +/Limits [(subsubsection.3.2.2) (subsubsection.3.2.7)] +>> +% 2358 0 obj +<< +/Names [(subsubsection.3.2.8) 135 0 R (subsubsection.3.2.9) 139 0 R (subsubsection.3.3.1) 187 0 R (subsubsection.3.3.2) 191 0 R (subsubsection.3.3.3) 195 0 R (subsubsection.3.3.4) 199 0 R] +/Limits [(subsubsection.3.2.8) (subsubsection.3.3.4)] +>> +% 2359 0 obj +<< +/Names [(subsubsection.3.3.5) 204 0 R (subsubsection.3.3.6) 208 0 R (table.1) 1099 0 R (table.10) 1247 0 R (table.11) 1255 0 R (table.12) 1268 0 R] +/Limits [(subsubsection.3.3.5) (table.12)] +>> +% 2360 0 obj +<< +/Names [(table.13) 1288 0 R (table.14) 1316 0 R (table.15) 1332 0 R (table.16) 1347 0 R (table.17) 1361 0 R (table.18) 1390 0 R] +/Limits [(table.13) (table.18)] +>> +% 2361 0 obj +<< +/Names [(table.19) 1426 0 R (table.2) 1144 0 R (table.20) 1438 0 R (table.21) 2059 0 R (table.3) 1161 0 R (table.4) 1174 0 R] +/Limits [(table.19) (table.4)] +>> +% 2362 0 obj << -/Names [(table.5) 1087 0 R (table.6) 1095 0 R (table.7) 1108 0 R (table.8) 1120 0 R (table.9) 1140 0 R (title.0) 3 0 R] +/Names [(table.5) 1188 0 R (table.6) 1196 0 R (table.7) 1209 0 R (table.8) 1221 0 R (table.9) 1240 0 R (title.0) 3 0 R] /Limits [(table.5) (title.0)] >> -% 2089 0 obj +% 2363 0 obj << -/Names [(vbasedata) 933 0 R (vdata) 985 0 R] +/Names [(vbasedata) 1033 0 R (vdata) 1086 0 R] /Limits [(vbasedata) (vdata)] >> -% 2090 0 obj +% 2364 0 obj << -/Kids [1997 0 R 1998 0 R 1999 0 R 2000 0 R 2001 0 R 2002 0 R] +/Kids [2261 0 R 2262 0 R 2263 0 R 2264 0 R 2266 0 R 2267 0 R] /Limits [(Doc-Start) (Item.125)] >> -% 2091 0 obj + +endstream +endobj +2388 0 obj << -/Kids [2003 0 R 2004 0 R 2005 0 R 2006 0 R 2007 0 R 2008 0 R] -/Limits [(Item.126) (Item.29)] + /Title (Parallel Sparse BLAS V. 3.9.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) /Producer ($Id$) /Author()/Title()/Subject()/Creator(LaTeX with hyperref)/Keywords() +/CreationDate (D:20240716132003+02'00') +/ModDate (D:20240716132003+02'00') +/Trapped /False +/PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023/Fedora 40) kpathsea version 6.3.5) >> -% 2092 0 obj +endobj +2366 0 obj << -/Kids [2009 0 R 2010 0 R 2011 0 R 2012 0 R 2013 0 R 2015 0 R] -/Limits [(Item.3) (Item.61)] +/Type /ObjStm +/N 22 +/First 209 +/Length 2765 >> -% 2093 0 obj +stream +2365 0 2367 112 2368 223 2369 333 2370 449 2371 573 2372 693 2373 806 2374 918 2375 1029 +2376 1140 2377 1254 2378 1375 2379 1501 2380 1627 2381 1757 2382 1878 2383 1999 2384 2121 2385 2224 +2386 2308 2387 2343 +% 2365 0 obj << -/Kids [2016 0 R 2017 0 R 2018 0 R 2019 0 R 2020 0 R 2021 0 R] -/Limits [(Item.62) (Item.94)] +/Kids [2268 0 R 2269 0 R 2270 0 R 2271 0 R 2272 0 R 2273 0 R] +/Limits [(Item.126) (Item.26)] >> -% 2094 0 obj +% 2367 0 obj << -/Kids [2022 0 R 2023 0 R 2024 0 R 2025 0 R 2026 0 R 2027 0 R] -/Limits [(Item.95) (listing.4)] +/Kids [2274 0 R 2275 0 R 2276 0 R 2277 0 R 2278 0 R 2279 0 R] +/Limits [(Item.27) (Item.59)] >> -% 2095 0 obj +% 2368 0 obj << -/Kids [2028 0 R 2029 0 R 2030 0 R 2031 0 R 2032 0 R 2033 0 R] -/Limits [(listing.5) (page.103)] +/Kids [2280 0 R 2281 0 R 2282 0 R 2283 0 R 2284 0 R 2285 0 R] +/Limits [(Item.6) (Item.91)] >> -% 2096 0 obj +% 2369 0 obj << -/Kids [2034 0 R 2035 0 R 2036 0 R 2037 0 R 2038 0 R 2039 0 R] -/Limits [(page.104) (page.136)] +/Kids [2286 0 R 2287 0 R 2288 0 R 2289 0 R 2290 0 R 2291 0 R] +/Limits [(Item.92) (equation.4.1)] >> -% 2097 0 obj +% 2370 0 obj << -/Kids [2040 0 R 2041 0 R 2042 0 R 2043 0 R 2044 0 R 2045 0 R] -/Limits [(page.137) (page.20)] +/Kids [2292 0 R 2293 0 R 2294 0 R 2295 0 R 2296 0 R 2297 0 R] +/Limits [(equation.4.2) (lstnumber.-11.1)] >> -% 2098 0 obj +% 2371 0 obj << -/Kids [2046 0 R 2047 0 R 2048 0 R 2049 0 R 2050 0 R 2051 0 R] -/Limits [(page.21) (page.53)] +/Kids [2298 0 R 2299 0 R 2300 0 R 2301 0 R 2302 0 R 2303 0 R] +/Limits [(lstnumber.-12.1) (page.115)] >> -% 2099 0 obj +% 2372 0 obj << -/Kids [2052 0 R 2053 0 R 2054 0 R 2055 0 R 2056 0 R 2057 0 R] -/Limits [(page.54) (page.86)] +/Kids [2304 0 R 2305 0 R 2306 0 R 2307 0 R 2308 0 R 2309 0 R] +/Limits [(page.116) (page.148)] >> -% 2100 0 obj +% 2373 0 obj << -/Kids [2058 0 R 2059 0 R 2060 0 R 2061 0 R 2062 0 R 2063 0 R] -/Limits [(page.87) (subsection.10.2)] +/Kids [2310 0 R 2311 0 R 2312 0 R 2313 0 R 2314 0 R 2315 0 R] +/Limits [(page.149) (page.21)] >> -% 2101 0 obj +% 2374 0 obj << -/Kids [2064 0 R 2065 0 R 2066 0 R 2067 0 R 2068 0 R 2069 0 R] -/Limits [(subsection.10.3) (subsection.6.10)] +/Kids [2316 0 R 2317 0 R 2318 0 R 2319 0 R 2320 0 R 2321 0 R] +/Limits [(page.22) (page.54)] >> -% 2102 0 obj +% 2375 0 obj << -/Kids [2070 0 R 2071 0 R 2072 0 R 2073 0 R 2074 0 R 2075 0 R] -/Limits [(subsection.6.11) (subsection.7.3)] +/Kids [2322 0 R 2323 0 R 2324 0 R 2325 0 R 2326 0 R 2327 0 R] +/Limits [(page.55) (page.87)] >> -% 2103 0 obj +% 2376 0 obj << -/Kids [2076 0 R 2077 0 R 2078 0 R 2079 0 R 2080 0 R 2081 0 R] -/Limits [(subsection.7.4) (subsubsection.3.2.12)] +/Kids [2328 0 R 2329 0 R 2330 0 R 2331 0 R 2332 0 R 2333 0 R] +/Limits [(page.88) (section*.7)] >> -% 2104 0 obj +% 2377 0 obj << -/Kids [2082 0 R 2083 0 R 2084 0 R 2085 0 R 2086 0 R 2087 0 R] -/Limits [(subsubsection.3.2.13) (table.4)] +/Kids [2334 0 R 2335 0 R 2336 0 R 2337 0 R 2338 0 R 2339 0 R] +/Limits [(section*.8) (subsection.3.4)] >> -% 2105 0 obj +% 2378 0 obj << -/Kids [2088 0 R 2089 0 R] -/Limits [(table.5) (vdata)] +/Kids [2340 0 R 2341 0 R 2342 0 R 2343 0 R 2344 0 R 2345 0 R] +/Limits [(subsection.3.5) (subsection.6.22)] >> -% 2106 0 obj +% 2379 0 obj << -/Kids [2090 0 R 2091 0 R 2092 0 R 2093 0 R 2094 0 R 2095 0 R] -/Limits [(Doc-Start) (page.103)] +/Kids [2346 0 R 2347 0 R 2348 0 R 2349 0 R 2350 0 R 2351 0 R] +/Limits [(subsection.6.23) (subsection.9.3)] >> -% 2107 0 obj +% 2380 0 obj << -/Kids [2096 0 R 2097 0 R 2098 0 R 2099 0 R 2100 0 R 2101 0 R] -/Limits [(page.104) (subsection.6.10)] +/Kids [2352 0 R 2353 0 R 2354 0 R 2355 0 R 2356 0 R 2357 0 R] +/Limits [(subsection.9.4) (subsubsection.3.2.7)] >> -% 2108 0 obj +% 2381 0 obj << -/Kids [2102 0 R 2103 0 R 2104 0 R 2105 0 R] -/Limits [(subsection.6.11) (vdata)] +/Kids [2358 0 R 2359 0 R 2360 0 R 2361 0 R 2362 0 R 2363 0 R] +/Limits [(subsubsection.3.2.8) (vdata)] >> -% 2109 0 obj +% 2382 0 obj +<< +/Kids [2364 0 R 2365 0 R 2367 0 R 2368 0 R 2369 0 R 2370 0 R] +/Limits [(Doc-Start) (lstnumber.-11.1)] +>> +% 2383 0 obj << -/Kids [2106 0 R 2107 0 R 2108 0 R] +/Kids [2371 0 R 2372 0 R 2373 0 R 2374 0 R 2375 0 R 2376 0 R] +/Limits [(lstnumber.-12.1) (section*.7)] +>> +% 2384 0 obj +<< +/Kids [2377 0 R 2378 0 R 2379 0 R 2380 0 R 2381 0 R] +/Limits [(section*.8) (vdata)] +>> +% 2385 0 obj +<< +/Kids [2382 0 R 2383 0 R 2384 0 R] /Limits [(Doc-Start) (vdata)] >> -% 2110 0 obj +% 2386 0 obj << -/Dests 2109 0 R +/Dests 2385 0 R >> -% 2111 0 obj +% 2387 0 obj << /Type /Catalog -/Pages 1994 0 R -/Outlines 1995 0 R -/Names 2110 0 R +/Pages 2258 0 R +/Outlines 2259 0 R +/Names 2386 0 R /URI (http://ce.uniroma2.it/psblas) /PageMode/UseOutlines/PageLabels<>2<>6<>]>> -/OpenAction 576 0 R +/OpenAction 657 0 R >> endstream endobj -2113 0 obj +2389 0 obj << /Type /XRef -/Index [0 2114] -/Size 2114 +/Index [0 2390] +/Size 2390 /W [1 3 1] -/Root 2111 0 R -/Info 2112 0 R -/ID [<3E2BE59A045B6F63645E5D8EE5B06F10> <3E2BE59A045B6F63645E5D8EE5B06F10>] -/Length 10570 ->> -stream -ÿ”[ÌSìKÌRìUÌQì]ÌP=ÌO  -=ÌN  =.ÌM =5ÌL=:ÌK=;ÌJ=[ÌI=\ÌH=]ÌG=^ÌF=bÌE=cÌD  ÌC!" ÌB#$ ÌA%& Ì@'( Ì?)*  Ì>+,  Ì=-. Ì</0 Ì;12 Ì:34 Ì956 Ì878 "Ì79: #Ì6;< %Ì5=> )Ì4?@ *Ì3AB +Ì2CD /Ì1EF 0Ì0GH 4Ì/IJ 5Ì.KL 9Ì-MN :Ì,OP ;Ì+QR ?Ì*ST EÌ)UV FÌ(WX GÌ'YZ HÌ&[\ NÌ%]^ OÌ$_` PÌ#ab VÌ"cËR [Ì!ËË \Ì ËË ]ÌËËóÌËËóÌË Ë -óÌË Ë óÌË Ëó.ÌËËó9ÌËËóEÌËËóMÌËËóYÌËËbÌËËbÌËËbÌËËbÌËË b(ÌË!Ë"b<ÌË#Ë$bWÌË%Ë&ÑÌË'Ë(ÑÌ Ë)Ë*ÑÌ Ë+Ë,ÑÌ Ë-Ë.Ñ3Ì -Ë/Ë0ÑQÌ Ë1Ë2ÑZÌË3Ë4DÌË5Ë6DÌË7Ë8DÌË9Ë:D&ÌË;Ë<D-ÌË=Ë>D2ÌË?Ë@D8ÌËAËBDCÌËCËDDNÄcËEËF¸ÄbËGËH¸ÄaËIËJ¸Ä`ËKËL¸"Ä_ËMËN¸,Ä^ËOËP¸8Ä]ËQËR¸@Ä\ËSËT¸DÄ[ËUËV¸IÄZËWËX¸SÄYËYËZ¸XÄXË[Ë\¸^ÄWË]Ë^ÄVË_Ë`ÄUËaËb ÄTËc”3ÄS””ÄR””'ÄQ””+ÄP””;ÄO” ” -?ÄN” ” EÄM” ”KÄL””RÄK””VÄJ””ZÄI””^ÄH””bÄG””ŠÄF””Š ÄE””ŠÄD”” Š ÄC”!”"Š*ÄB”#”$Š4ÄA”%”&Š>Ä@”'”(ŠJÄ?”)”*ŠOÄ>”+”,ŠVÄ=”-”.Š_Ä<”/”0þÄ;”1”2þÄ:”3”4þ Ä9”5”6þÄ8”7”8þÄ7”9”:þÄ6”;”<þ&Ä5”=”>þ-Ä4”?”@þ4Ä3”A”Bþ;Ä2”C”DþDÄ1”E”FþJÄ0”G”HþSÄ/”I”Jþ^Ä.”K”LþcÄ-”M”NcÄ,”O”Pc Ä+”Q”Rc Ä*”S”TcÄ)”U”V”W5‘”X”\2}”Y”ZÄÄÄÄF=”_”]öÛ”^”a”b”c………………………… … -… … … ………………………………………!…"…#…$…%…&…'…(…)…*…/…-”`÷e…+…,…………… ×…0…1…2…3…4…5…6…7…8…9…:…;…<…=…>…?…@…A…B…C…D…E…F…G…H…I…J…K…L…M…N…O…P…Q…R…S…T…U…V…W…X…Y…Z…[…\…]…a…_….QX…^…b…cìììììììììì ì -ì ì ì ìììììììììììììììììì ì!ì"ì#ì%ì&ì(ì*ì/ì-…`¬ì,ìì$ì'ì)ì+¨àì1ì3ì5ì6ì7ì8ì9ì:ì;ì<ì=ì?ì.Aì>ì0ì2ì4ìAìBìCìDìEìFìGìHìIìJìLì@)ÒÄc=c4c*c>c<c1c2c;c.c/ìRìSìT^KìYìWìMIzìVìNìOìPìQcc0ìZì[ì`ìXn¤ì\cac`c_ì^ì_c:==¢6=ìa==ìbìc=ÇÆc-c,== = =äï= -=== øj= =========+=)=`======= =!="=#=$=%=&='=(cbÄ=,=6=*6Ø=-=/=0=1=2=3=4=8===7S=9=<=?=@=A=H=>sP=B=C=D=E=F=G=J=K=V=I‹B=L=M=N=O=P=Q=R=S=Tcc=U=X=Y=_=W¢â=Z =`¶{=aÄ   ë      -þn       ²  c9   2ê   &  Nž ! $ , ']î (Ä  1 -nO . 6 2‚¢ 3 < 7”[ 8 @ =¦ > B C J A¾„ D I M Q K܃ LÄ - S T W RíÆ Uó Y a _ Xü™ Z ^ có `F bVòóóAKóóóó ó -ó ó óóóB&ó Äóóóó^fóÄ óóóóóó óhóóó"ó%ó!…[ó#ó$ó'ó(ó)ó*ó+ó,ó0ó&”‹ó-ó/ó3ó1µó2ó5ó6ó7ó;ó4·žó8ó:ó?ó<Õó=ó>Ä óAóBóCóGó@ßuóDóFóIóJóKóOóHøóLóNóSóP óQóRóUóVóWó[óTuóXóZó^ó\;>ó]ó`óaóbbó_=ÂócbÄ –¸b b†ˆbbbbbb b -b bbbb —§bbbbbb¯†bbbbb!bÄÌbÄb b#b$b%b&b/b-b"Ú$b'b)b*b+b,b0b1b2b3b4b5b6b8b.ö£b7Äb:b@b>b9db;b=bAbBbCbDbEbFbHb?,“bGbJbKbMbIIObLbObPbQbRbSbTbUbYbN[IbVbXb\bZxnb[b^b_b`babbÑÑÑb]yåbcÑÄ©µÑÑÇ¢ÑÑ Ñ -Ñ Ñ Ñ ÑÑÌçÑÑÑÑÑéfÑÑÑÑÑ&ÑÑìÑÑ/Ñ'Ñ*ÑÑ(Ñ)Ñ Ñ!Ñ"Ñ#Ñ$Ñ%ŠRùÑ-Ñ+€‘Ñ,ÄÑ/Ñ0Ñ1Ñ5Ñ.Œ¶Ñ2Ñ4Ñ7ÏpÑ8Ñ9Ñ>Ñ6ªRÑ:Ñ;Ñ<Ñ=ÑJÑAÑ?¿ÖÑ@ÑKÑBÍòÑIÑCÑDÑEÑFÑGÑHÞ}ÑMÑNÑOÑSÑL?ÑPÑRÑVÑT^ïÑUÄÑXÑ^Ñ\ÑWdµÑYÑ[Ñ_Ñ`ÑbÑ]€üÑaDÑcÉDô}DDDÛ“DDD -D D ô‘D c^D DDÏDDDDDDÄDDD&DDD!DA¢DDDD D#D$D(D"MžD%D'D*D+D.D)e,D,D0D3D/qÉD1D5D6D9D4z}D7ÄD>D:‘D;D<D=D@DADED?—DBDDDKDF­‘DGDHDIDJDQDODL²CDMDRDSDZDPÇDTDUDVDWDXDYDcD[â‰D\D]D^D_D`DaDbĸ¸¸¸¸š¸%Š¸¸7\¸¸ ¸ -¸ ¸ ¸ ¸¸¸¸¸¸C㸸¸¸¸O˸¸¸¸ ¸#¸_¸!¸'¸$wm¸%¸&ĸ)¸*¸-¸({G¸+¸2¸.”D¸/¸0¸1¸4¸5¸6¸;¸3–œ¸7¸9¸:¸=¸>¸A¸<®Á¸?¸E¸B»–¸C¸G¸J¸FÈe¸HĸO¸Ká¸L¸M¸N¸Q¸T¸PãݸR¸V¸Z¸Uùl¸W¸Y¸\¸`¸[ -¸]¸_¸b¸a¸cDÑOsÄ - ba  pÝ~œ!”A #$%("¢G&,)²*Ä8-ÈV./01234567<9å:B=åÿ>@AHCûÂDFGOI hJLMNSPùQÄWT'âU[X5(Y_\9è]c`?‘aŠŠjŠ ýŠ ŠìŠŠŠŠc]ÄŠ Š -’ýŠ ŠŠ©ÿŠŠŠŠŠŠ»£ŠŠŠÑŒŠŠŠŠŠ!Šä7ŠŠ'Š"û)Š#Š$Š%Š&ÄŠ+Š( »Š)Š1Š, "èŠ-Š.Š/Š0Š5Š2 5½Š3Š;Š6 KíŠ7Š8Š9Š:Š?Š< ^»Š=ŠGŠ@ uˆŠAŠBŠCc\ŠDŠEŠFÄŠLŠH Œ\ŠIŠKŠQŠM ¡ŠNŠPŠSŠTŠWŠR ¶ªŠUŠZŠ[Š\ŠX ÏHŠYŠbŠ] éwŠ^Š`ŠaþŠc ÷¦þþþÄ"#þ -þ!#7þþþ þþ !)jþ þþþþ!1‡þþþþ!3¿þþþþþ"þ!EÅþþ þ!þ$þ)þ#!YVþ%þ'þ(Äþ+þ0þ*!giþ,þ.þ/þ2þ7þ1!wáþ3þ5þ6þ9þ>þ8!’½þ:þ<þ=þAþ?!­:þ@þEþB!¯ÓþCþGþHþLþF!´eþIþKÄþNþOþPþQþTþM!È:þRþWþU!æ þVþYþZþ[þ\þ_þX!êEþ]þacþ`"«þb%cc"5öcccc -c":cÄ cc "DÌc ccccc"G-cccccccccc"g cc!c#c "ƒkc"c&c$"•1c%c6c'"•¿c(c)c+c3c5Ä!c@c7"¯ºc8c?cAcBcCcDcEcFcGcHcIcJc[cKcLcM"»¶cN"Ø•cO"êcP#•cQ#& cR#L~cS#œcT#¬DcU#äcV#ôdcW$ cX$`ÁcY$ócZ%Q1Ä"Ä#Ä$Ä%Ä&Ä'Ä(%|ëÌTÌUÌVÌWÌXÌYÌZÌ[Ì\Ì]Ì^Ì_Ì`ÌaÌbÌcÞ%ªhÞÞÞÞÞÞÞÞÞ Þ -Þ Þ Þ ÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞÞ Þ!Þ"Þ#Þ$Þ%Þ&Þ'Þ(Þ)Þ*Þ+Þ,Þ-Þ.Þ/Þ0Þ1Þ2Þ3Þ4Þ5Þ6Þ7Þ8Þ9Þ:Þ;Þ<Þ=Þ>Þ?Þ@ÞAÞBÞCÞDÞEÞFÞGÞHÞIÞJÞKÞLÞMÞNÞOÞPÞQÞRÞSÞTÞUÞVÞWÞXÞYÞZÞ[Þ\Þ]Þ^Þ_Þ`Þa%¨H%ò© +/Root 2387 0 R +/Info 2388 0 R +/ID [<47BB61B7C67E5662CD653809E7A13A98> <47BB61B7C67E5662CD653809E7A13A98>] +/Length 11950 +>> +stream +ÿ]Ô`c#Ô_c-Ô^c5Ô]cAÔ\  +cJÔ[  ÌÔZ Ì ÔYÌÔXÌÔWÌ3ÔVÌ4ÔUÌ5ÔTÌ6ÔSÌ:ÔRÌ;ÔQ Ì<ÔP!"Ì@ÔO#$ÌAÔN%&ÌCÔM'(ÌDÔL)*ÌHÔK+,ÌIÔJ-.ÌJÔI/0ÌQÔH12ÌRÔG34ÌYÔF56ÌZÔE78Ì^ÔD9:Ì_ÔC;<ÌaÔB=>'ÔA?@'Ô@AB'Ô?CD'Ô>EF'Ô=GH' Ô<IJ' Ô;KL'Ô:MN'Ô9OP'Ô8QR'Ô7ST'Ô6UV'Ô5WX'Ô4YZ' Ô3[\'&Ô2]^''Ô1_`'(Ô0ab'.Ô/cË?%'3Ô.ËË'4Ô-ËË'5Ô,ËË'<Ô+ËË'@Ô*Ë Ë +'JÔ)Ë Ë 'ZÔ(Ë ËŠÔ'ËËŠÔ&ËËŠÔ%ËËŠ%Ô$ËËŠ1Ô#ËËŠ<Ô"ËËŠMÔ!ËËŠTÔ ËËŠ[ÔËË õÔË!Ë"õÔË#Ë$õ/ÔË%Ë&õ<ÔË'Ë(õKÔË)Ë*õQÔË+Ë,õXÔË-Ë.a ÔË/Ë0a)ÔË1Ë2a2ÔË3Ë4a=ÔË5Ë6a>ÔË7Ë8aUÔË9Ë:abÔË;Ë<ÔÔË=Ë>Ô +ÔË?Ë@ÔÔËAËBÔÔ ËCËDÔ&Ô ËEËFÔAÔ ËGËHÔQÔ +ËIËJÔWÔ ËKËLÔ^ÔËMËNGÔËOËPGÔËQËRGÔËSËTGÔËUËVG!ÔËWËXG+ÔËYËZG0ÔË[Ë\G6ÔË]Ë^G<ÌcË_Ë`GBÌbËaËbGHÌaËc”}ƒGOÌ`””GVÌ_””GcÌ^””¬Ì]””¬Ì\” ” +¬Ì[” ” ¬ÌZ” ”¬#ÌY””¬*ÌX””¬.ÌW””¬2ÌV””¬6ÌU””¬:ÌT””¬>ÌS””¬HÌR””¬RÌQ”” ¬\ÌP”!”"ÌO”#”$ ÌN”%”&ÌM”'”("ÌL”)”*'ÌK”+”,.ÌJ”-”.7ÌI”/”0=ÌH”1”2CÌG”3”4IÌF”5”6OÌE”7”8TÌD”9”:[ÌC”;”<bÌB”=”>‰ÌA”?”@‰ Ì@”A”B‰Ì?”C”D‰Ì>”E”F‰"Ì=”G”H‰+Ì<”I”J‰6Ì;”K”L‰;Ì:”M”N‰?Ì9”O”P‰EÌ8”Q”R‰IÌ7”S”T‰OÌ6”U”VùÌ5”W”XùÌ4”Y”Zù Ì3”[”\ùÌ2”]”^ù]Ì1”_”`ùaÌ0”a”bùbÌ/”c]üöxÌ.]]xÌ-]]xÌ,]]xÌ+]]xÌ*] ] +xÌ)] ] x Ì(] ]x +Ì']]x Ì&]]xÌ%]]xÌ$]]xÌ#]]xÌ"]]]À£]] ½Ž]]x\x^x]x_ÑO]#]!í]"]%]&]'](])]*]+],]-].]/]0]1]2]3]4]5]6]7]9];]<]=]>]?]@]A]B]C]E]G]I]J]K]L]M]N]O]P]Q]R]W]U]$‚w]S]T]8]:]D]F]H]X]Y]Z][]\]]]^]_]`]a]b]c           +                       ! % #]V¾¢ "°Â & ' ( ) * + , - . / 0 1 2 3 4 5 6 8 9 : ; < = > ? @ A B C D E F G H I J K M N P R W U $5¡ T 7 L O Q S Y [ ] ^ _ ` a b ccccccccccc c +c c c ccccccccc V}c X Z \ cccccccc c!c"c$cñx`x,x#x0x.x+x x!x*xxc*c+c,%Žc1c/c%½c.c&c'c(c)*Dxc2c3c8c05çc4xWxVxUc6c7x)c=c>iycBc9V_c?c@c:c;c<xxcDcEcGcCp#cFcUcScHƒžcIcKcLcMcNcOcPcQcRÌÌcT¤”cVcWcXcYcZc[c\c]c^c_c`cacbccÌxXxa ÌÌÌò²ÌÌÌÌ Ì +Ì Ì ÌÌÌóÌÌÌÌÌÌ Ì/*ÌÌÌÌÌÌÌ"Ì#Ì.Ì!GÌ$Ì%Ì&Ì'Ì(Ì)Ì*Ì+Ì,xYÌ-Ì0Ì1Ì7Ì/bÞÌ2Ì=Ì8vxÌ9xbÌEÌ>‡ÄÌ?ÌBÌKÌFš“ÌGÌMÌNÌOÌTÌL°ØÌPÌSx%ÌWÌ[ÌUÏÌVÌXÌbÌ\ì'Ì]Ì`'Ìcûx'xcbž' '1¢''' +HÚ' ''['''mo''''"'†`''!'%')'#¤`'$Ì'+','/'*¸e'-'K'1'9'7'0ǽ'2'6';'='8Ùj':'A'>ê1'?'C'D'E'F'G'H'N'L'Bë 'Ix['O'P'R'Mr'QÌ'T'U'V'W'X'\'S'Y'['^'a']0/'_'`'cŠŠŠŠŠŠ'bAŠŠª{Š Š ŒfŠ +Š ŠŠŠŠ ŽéŠŠŠŠ­‡ŠŠÌŠŠŠŠŠ¹§ŠŠŠ!Š"Š#Š'Š ÓŠ$Š&Š+Š(ð-Š)Š*Š-Š.Š/Š3Š,üPŠ0Š2Š6Š4ÛŠ5Š8Š9Š:Š>Š7_Š;Š=ÌŠGŠ?6»Š@ŠAŠBŠCŠDŠEŠFŠIŠJŠKŠOŠHGÚŠLŠNŠQŠRŠVŠP`{ŠSŠUŠXŠYŠ]ŠWuÁŠZxZŠ\Š_Š`ŠaŠbõõŠ^‹Šcõõõõ7¿õõ õ +õ õ õ õõõ×ÅõÌõõõõñ†õõõõõõõõõ õ_õõ"õ#õ%õ!.õ$õ'õ(õ)õ*õ+õ,õ-õ1õ&@õ.õ0õ4õ2]Ðõ3õ6õ7õ8õ9õ:õ@õAõ>õ5_Gõ;õ=ÌõCõ?|âõBõEõFõGõHõIõMõD‚'õJõLõOõRõNŸnõPõTõUõVõbõZõS¢õWõYÈ]õcaõ[¼Aaaõ\õ]õ^õ_õ`õaÖ¸ +'"Óaal`aÌaaa a ax…a +a a¼kaaaa—Maaaaa"aa¬Ñaa#aºía!aaaaaa Ë þxa%a&a'a+a$,a(a*a.a,Mza-Ìa0a6a4a/S@a1a3a7a8a:a5pa9a?a;€Na<aAaDa@™:aBaCaFaIaE²8aGxTaHaQaJÙvaKaLaMaNaOaPÌaSaVaRã®aTaXa]aWÿIaYaZa[a\a_a`Ôa^ Eaaac=ŠÔÔÔÔOŒÔÔÔ Ô\)Ô Ô ÔÔÔ dÝÔÌ ÔÔ{hÔÔÔÔÔÔÔqÔÔÔ#Ô—ñÔÔ Ô!Ô"Ô)Ô'Ô$œ£Ô%Ô*Ô+Ô2Ô(±ßÔ,Ô-Ô.Ô/Ô0Ô1Ô;Ô3ÌéÔ4Ô5Ô6Ô7Ô8Ô9Ô:Ì +Ô=Ô>Ô?ÔBÔ<ÞÝÔ@ÔLÔCùŸÔDÔEÔFÔGÔHÔIÔJÔKÔNÔOÔRÔM&ÔPÔTÔUÔYÔSÔVÔXÔ[Ô\Ô_ÔZ!]Ô]ÔcÔ`9°ÔaÔbÌ GGGGf(GsG +G%GGG G G GGG }GGGGGGG™¢GGG¦wGGG"G³FG Ì G'G#ËáG$G%G&G)G,G(ξG*G.G2G-äMG/G1G4G8G3ðëG5G7G:G>G9ãG;G=G@GDG?kGAGCÌ GFGKGE#YGGGIGJGMGRGL1ÕGNGPGQGTGWGS?”GUG]GXU9GYGZG[G\G_G`Ga¬G^c?Gb ÙÚ¬¬œ’¬Ì¬¬²É¬¬¬¬ ¬ +¬ ¬ ¬ ¬¬¬¬ÏŠ¬¬¬Ðr¬¬¬¬ ¬æ5¬¬¬¬'¬!÷Û¬"¬$¬%¬&¬+¬( l¬)̬/¬, U¬-¬3¬0 ›¬1¬7¬4 $[¬5¬;¬8 *¬9¬?¬< /D¬=¬E¬@ E¬A¬B¬C¬DxS̬I¬F Z%¬G¬O¬J q'¬K¬L¬M¬N¬S¬P „ȬQ¬Y¬T š±¬U¬V¬W¬X¬]¬Z ¯Y¬[¬c¬^ ÆK¬_¬`¬a¬bÌ þ0"F­ !]  +!) !?3!Sþ!jËxRÌ$ !ƒt!#)%!˜™&(+,/*!­Â-2340!Æ`1:5!îÛ689@;!ý +<>?ÌFA"{BDELG"®HJKPM"ËNRWQ"SUVY^X"% Z\]`‰_"8šac‰Ì#’Ú‰‰‰"kƉ‰‰‰ +‰‰ "|>‰ ‰ ‰‰‰‰"˜º‰‰‰‰‰"´˜‰‰‰"·d‰‰‰ ‰$‰"»ö‰!‰#̉&‰'‰(‰)‰,‰%"Ïˉ*‰/‰-"휉.‰1‰2‰3‰4‰7‰0"ñÖ‰5‰9‰<‰8#<‰:‰@‰=#´‰>‰B‰C‰F‰A#¾‰D̉J‰G#$Š‰H‰L‰M‰V‰T‰K#&ë‰N‰P‰Q‰R‰S‰W‰X‰Y‰[‰U#G^‰Z‰]‰_‰\#c)‰^‰a‰cùùùùù‰`#tïù‰b%•lx1x2ù ù +#óûù $!ù#ùùù#¿Ìù ùùÌx-ùù%ùù#áàùùùùùù$ù.ù0ù$’ù$ù&ù'ù(ù)ù*ù+ù,ù-ù/ùù$+]ùù ù!$,eù"$-[$.e$M?$uù8ù9ù:ù>ù1$Vqù;ù<ù=ù2ù3ù4ù5ù6$‚ù7$ƒ&$„$‘ØùHùJ$Ð3ùFùLù?$°®ùGùIùKù@ùAùBùCùD$Û<ùE$Ü8$ÝH$öå%&|ùTùWùM%ùUùVùNùO%3·ùPùQùR%5(ùS%6$%7D%TOùZùX%aùYÌù^ù[%dGù\xù_%êù`ùc)É,x x%È[xxx %ݦxxx%è„xx&x%éxxxx"x$Ìx3x'& x(x/x4xOx5x6x7x8xQx9x:xPx;x<x=xNx>x?x@&éxA&0ÈxB&NÕxC&vÍxD&‡*xE&–æxF&êÅxG'kVxH'¼+xI((¼xJ(pxK(ÎÐxL)4çxM)„‘)ˆ½){)“·)˜ÿ)¡>)¥Õ)ªg)®ù)²‹)·!)½#)Ã%Ì*ÌÌÌÌÌÌ Ì!*8=ÔaÔbÔcÙ*_EÙÙÙÙÙÙÙÙÙ Ù +Ù Ù Ù ÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙÙ Ù!Ù"Ù#Ù$Ù%Ù&Ù'Ù(Ù)Ù*Ù+Ù,Ù-Ù.Ù/Ù0Ù1Ù2Ù3Ù4Ù5Ù6Ù7Ù8Ù9Ù:Ù;Ù<Ù=Ù>Ù?Ù@ÙAÙBÙCÙDÙEÙFÙGÙHÙIÙJÙKÙLÙMÙNÙOÙPÙQÙRÙSÙTÙUÙVÙWÙXÙYÙZÙ[Ù\Ù]Ù^Ù_Ù`ÙaÙbÙc >*±. > > > > > > > > >  > + >  >  >  > > > > > > > >*¯*¼W endstream endobj startxref -2486953 +2800727 %%EOF diff --git a/docs/src/Makefile b/docs/src/Makefile index e6ee0c5e..385d9051 100644 --- a/docs/src/Makefile +++ b/docs/src/Makefile @@ -86,7 +86,8 @@ TOPFILE = userguide.tex HTMLFILE = userhtml.tex SECFILE = intro.tex commrout.tex datastruct.tex psbrout.tex toolsrout.tex\ - methods.tex precs.tex penv.tex error.tex util.tex biblio.tex + methods.tex precs.tex penv.tex error.tex util.tex biblio.tex \ + ext-intro.tex cuda.tex FIGDIR = figures XPDFFLAGS = @@ -139,7 +140,7 @@ PDF = $(join $(BASEFILE),.pdf) PS = $(join $(BASEFILE),.ps) GXS = $(join $(BASEFILE),.gxs) GLX = $(join $(BASEFILE),.glx) -TARGETPDF= ../psblas-3.8.pdf +TARGETPDF= ../psblas-3.9.pdf BASEHTML = $(patsubst %.tex,%,$(HTMLFILE)) HTML = $(join $(BASEHTML),.html) HTMLDIR = ../html diff --git a/docs/src/biblio.tex b/docs/src/biblio.tex index 14c6dbdd..5ba3605c 100644 --- a/docs/src/biblio.tex +++ b/docs/src/biblio.tex @@ -1,9 +1,5 @@ \begin{thebibliography}{99} -\bibitem{DesPat:11} - D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson -{\em Design Patterns for Scientific Computations on Sparse Matrices}, - HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011 \bibitem{PARA04FOREST} G.~Bella, S.~Filippone, A.~De Maio and M.~Testa, @@ -154,6 +150,11 @@ Lawson, C., Hanson, R., Kincaid, D. and Krogh, F., {\em Fortran 95/2003 explained.} {Oxford University Press}, 2004. % +\bibitem{MRC:11} +{Metcalf, M., Reid, J. and Cohen, M.} +{\em Modern Fortran explained.} +{Oxford University Press}, 2011. +% %% \bibitem{DD2} %% B.~Smith, P.~Bjorstad and W.~Gropp, %% {\em Domain Decomposition: Parallel Multilevel Methods for Elliptic @@ -169,4 +170,20 @@ M.~Snir, S.~Otto, S.~Huss-Lederman, D.~Walker and J.~Dongarra, {\em MPI: The Complete Reference. Volume 1 - The MPI Core}, second edition, MIT Press, 1998. % + +\bibitem{DesPat:11} + D.~Barbieri, V.~Cardellini, S.~Filippone and D.~Rouson +{\em Design Patterns for Scientific Computations on Sparse Matrices}, + HPSS 2011, Algorithms and Programming Tools for Next-Generation High-Performance Scientific Software, Bordeaux, Sep. 2011 + +\bibitem{CaFiRo:2014} +{ Cardellini, V.}, { Filippone, S.}, { and} { Rouson, D.} 2014, + Design patterns for sparse-matrix computations on hybrid {CPU/GPU} + platforms, +{\em Scientific Programming\/}~{\em 22,\/}~1, 1--19. +\bibitem{OurTechRep} +D.~Barbieri, V.~Cardellini, A.~Fanfarillo, S.~Filippone, Three storage formats + for sparse matrices on {GPGPUs}, Tech. Rep. DICII RR-15.6, Universit\`a di + Roma Tor Vergata (February 2015). + \end{thebibliography} diff --git a/docs/src/cuda.tex b/docs/src/cuda.tex new file mode 100644 index 00000000..0ac435ef --- /dev/null +++ b/docs/src/cuda.tex @@ -0,0 +1,395 @@ + +\subsection{CUDA-class extensions} + +For computing with CUDA we define a dual memorization strategy in +which each variable on the CPU (``host'') side has a GPU (``device'') +side. When a GPU-type variable is initialized, the data contained is +(usually) the same on both sides. Each operator invoked on the +variable may change the data so that only the host side or the device +side are up-to-date. + +Keeping track of the updates to data in the variables is essential: we want +to perform most computations on the GPU, but we cannot afford the time +needed to move data between the host memory and the device memory +because the bandwidth of the interconnection bus would become the main +bottleneck of the computation. Thus, each and every computational +routine in the library is built according to the following principles: +\begin{itemize} +\item If the data type being handled is {GPU}-enabled, make sure that + its device copy is up to date, perform any arithmetic operation on + the {GPU}, and if the data has been altered as a result, mark + the main-memory copy as outdated. +\item The main-memory copy is never updated unless this is requested + by the user either +\begin{description} +\item[explicitly] by invoking a synchronization method; +\item[implicitly] by invoking a method that involves other data items + that are not {GPU}-enabled, e.g., by assignment ov a vector to a + normal array. +\end{description} +\end{itemize} +In this way, data items are put on the {GPU} memory ``on demand'' and +remain there as long as ``normal'' computations are carried out. +As an example, the following call to a matrix-vector product +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + call psb_spmm(alpha,a,x,beta,y,desc_a,info) +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + call psb_spmm(alpha,a,x,beta,y,desc_a,info) +\end{verbatim} + \end{minipage} + \end{center} +\fi +will transparently and automatically be performed on the {GPU} whenever +all three data inputs \fortinline|a|, \fortinline|x| and +\fortinline|y| are {GPU}-enabled. If a program makes many such calls +sequentially, then +\begin{itemize} +\item The first kernel invocation will find the data in main memory, + and will copy it to the {GPU} memory, thus incurring a significant + overhead; the result is however \emph{not} copied back, and + therefore: +\item Subsequent kernel invocations involving the same vector will + find the data on the {GPU} side so that they will run at full + speed. +\end{itemize} +For all invocations after the first the only data that will have to be +transferred to/from the main memory will be the scalars \fortinline|alpha| +and \fortinline|beta|, and the return code \fortinline|info|. + +\begin{description} +\item[Vectors:] The data type \fortinline|psb_T_vect_gpu| provides a + GPU-enabled extension of the inner type \fortinline|psb_T_base_vect_type|, + and must be used together with the other inner matrix type to make + full use of the GPU computational capabilities; +\item[CSR:] The data type \fortinline|psb_T_csrg_sparse_mat| provides an + interface to the GPU version of CSR available in the NVIDIA CuSPARSE + library; +\item[HYB:] The data type \fortinline|psb_T_hybg_sparse_mat| provides an + interface to the HYB GPU storage available in the NVIDIA CuSPARSE + library. The internal structure is opaque, hence the host side is + just CSR; the HYB data format is only available up to CUDA version + 10. +\item[ELL:] The data type \fortinline|psb_T_elg_sparse_mat| provides an + interface to the ELLPACK implementation from SPGPU; + +\item[HLL:] The data type \fortinline|psb_T_hlg_sparse_mat| provides an + interface to the Hacked ELLPACK implementation from SPGPU; +\item[HDIA:] The data type \fortinline|psb_T_hdiag_sparse_mat| provides an + interface to the Hacked DIAgonals implementation from SPGPU; +\end{description} + + +\section{CUDA Environment Routines} +\label{sec:cudaenv} + +\subsection*{psb\_cuda\_init --- Initializes PSBLAS-CUDA + environment} +\addcontentsline{toc}{subsection}{psb\_cuda\_init} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +call psb_cuda_init(ctxt [, device]) +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +call psb_cuda_init(ctxt [, device]) +\end{verbatim} + \end{minipage} + \end{center} +\fi + +This subroutine initializes the PSBLAS-CUDA environment. +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry ] +\item[device] ID of CUDA device to attach to.\\ +Scope: {\bf local}.\\ +Type: {\bf optional}.\\ +Intent: {\bf in}.\\ +Specified as: an integer value. \ +Default: use \fortinline|mod(iam,ngpu)| where \fortinline|iam| is the calling +process index and \fortinline|ngpu| is the total number of CUDA devices +available on the current node. +\end{description} + + +{\par\noindent\large\bfseries Notes} +\begin{enumerate} +\item A call to this routine must precede any other PSBLAS-CUDA call. +\end{enumerate} + +\subsection*{psb\_cuda\_exit --- Exit from PSBLAS-CUDA + environment} +\addcontentsline{toc}{subsection}{psb\_cuda\_exit} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +call psb_cuda_exit(ctxt) +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +call psb_cuda_exit(ctxt) +\end{verbatim} + \end{minipage} + \end{center} +\fi + +This subroutine exits from the PSBLAS CUDA context. +\begin{description} +\item[Type:] Synchronous. +\item[\bf On Entry ] +\item[ctxt] the communication context identifying the virtual + parallel machine.\\ +Scope: {\bf global}.\\ +Type: {\bf required}.\\ +Intent: {\bf in}.\\ +Specified as: an integer variable. +\end{description} + + + + +\subsection*{psb\_cuda\_DeviceSync --- Synchronize CUDA device} +\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceSync} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +call psb_cuda_DeviceSync() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +call psb_cuda_DeviceSync() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +This subroutine ensures that all previosly invoked kernels, i.e. all +invocation of CUDA-side code, have completed. + + +\subsection*{psb\_cuda\_getDeviceCount } +\addcontentsline{toc}{subsection}{psb\_cuda\_getDeviceCount} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +ngpus = psb_cuda_getDeviceCount() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +ngpus = psb_cuda_getDeviceCount() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Get number of devices available on current computing node. + +\subsection*{psb\_cuda\_getDevice } +\addcontentsline{toc}{subsection}{psb\_cuda\_getDevice} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +ngpus = psb_cuda_getDevice() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +ngpus = psb_cuda_getDevice() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Get device in use by current process. + +\subsection*{psb\_cuda\_setDevice } +\addcontentsline{toc}{subsection}{psb\_cuda\_setDevice} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +info = psb_cuda_setDevice(dev) +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +info = psb_cuda_setDevice(dev) +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Set device to be used by current process. + +\subsection*{psb\_cuda\_DeviceHasUVA } +\addcontentsline{toc}{subsection}{psb\_cuda\_DeviceHasUVA} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +hasUva = psb_cuda_DeviceHasUVA() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +hasUva = psb_cuda_DeviceHasUVA() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns true if device currently in use supports UVA +(Unified Virtual Addressing). + +\subsection*{psb\_cuda\_WarpSize } +\addcontentsline{toc}{subsection}{psb\_cuda\_WarpSize} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nw = psb_cuda_WarpSize() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nw = psb_cuda_WarpSize() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the warp size. + + +\subsection*{psb\_cuda\_MultiProcessors } +\addcontentsline{toc}{subsection}{psb\_cuda\_MultiProcessors} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nmp = psb_cuda_MultiProcessors() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nmp = psb_cuda_MultiProcessors() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the number of multiprocessors in the CUDA device. + +\subsection*{psb\_cuda\_MaxThreadsPerMP } +\addcontentsline{toc}{subsection}{psb\_cuda\_MaxThreadsPerMP} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nt = psb_cuda_MaxThreadsPerMP() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nt = psb_cuda_MaxThreadsPerMP() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the maximum number of threads per multiprocessor. + + +\subsection*{psb\_cuda\_MaxRegistersPerBlock } +\addcontentsline{toc}{subsection}{psb\_cuda\_MaxRegisterPerBlock} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nr = psb_cuda_MaxRegistersPerBlock() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nr = psb_cuda_MaxRegistersPerBlock() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the maximum number of register per thread block. + + +\subsection*{psb\_cuda\_MemoryClockRate } +\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryClockRate} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +cl = psb_cuda_MemoryClockRate() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +cl = psb_cuda_MemoryClockRate() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the memory clock rate in KHz, as an integer. + +\subsection*{psb\_cuda\_MemoryBusWidth } +\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryBusWidth} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +nb = psb_cuda_MemoryBusWidth() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +nb = psb_cuda_MemoryBusWidth() +\end{verbatim} + \end{minipage} + \end{center} +\fi + +Returns the memory bus width in bits. + +\subsection*{psb\_cuda\_MemoryPeakBandwidth } +\addcontentsline{toc}{subsection}{psb\_cuda\_MemoryPeakBandwidth} + +\ifpdf +\begin{minted}[breaklines=true]{fortran} +bw = psb_cuda_MemoryPeakBandwidth() +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +bw = psb_cuda_MemoryPeakBandwidth() +\end{verbatim} + \end{minipage} + \end{center} +\fi +Returns the peak memory bandwidth in MB/s (real double precision). + + + diff --git a/docs/src/ext-intro.tex b/docs/src/ext-intro.tex new file mode 100644 index 00000000..44e9915c --- /dev/null +++ b/docs/src/ext-intro.tex @@ -0,0 +1,598 @@ +\section{Extensions}\label{sec:ext-intro} + +The EXT, CUDA and RSB subdirectories contains a set of extensions to the base +library. The extensions provide additional storage formats beyond the +ones already contained in the base library, as well as interfaces +to: +\begin{description} +\item[SPGPU] a CUDA library originally published as + \url{https://code.google.com/p/spgpu/} and now included in the + \verb|cuda| subdir, for computations on NVIDIA GPUs; +\item[LIBRSB] \url{http://sourceforge.net/projects/librsb/}, for + computations on multicore parallel machines. +\end{description} +The infrastructure laid out in the base library to allow for these +extensions is detailed in the references~\cite{DesPat:11,CaFiRo:2014,Sparse03}; +the CUDA-specific data formats are described in~\cite{OurTechRep}. + + +\subsection{Using the extensions} +\label{sec:ext-appstruct} +A sample application using the PSBLAS extensions will contain the +following steps: +\begin{itemize} +\item \verb|USE| the appropriat modules (\verb|psb_ext_mod|, + \verb|psb_cuda_mod|); +\item Declare a \emph{mold} variable of the necessary type + (e.g. \verb|psb_d_ell_sparse_mat|, \verb|psb_d_hlg_sparse_mat|, + \verb|psb_d_vect_cuda|); +\item Pass the mold variable to the base library interface where + needed to ensure the appropriate dynamic type. +\end{itemize} +Suppose you want to use the CUDA-enabled ELLPACK data structure; you +would use a piece of code like this (and don't forget, you need +CUDA-side vectors along with the matrices): +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} +program my_cuda_test + use psb_base_mod + use psb_util_mod + use psb_ext_mod + use psb_cuda_mod + type(psb_dspmat_type) :: a, agpu + type(psb_d_vect_type) :: x, xg, bg + + real(psb_dpk_), allocatable :: xtmp(:) + type(psb_d_vect_cuda) :: vmold + type(psb_d_elg_sparse_mat) :: aelg + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + call psb_cuda_init(ctxt, iam) + + + ! My own home-grown matrix generator + call gen_matrix(ctxt,idim,desc_a,a,x,info) + if (info /= 0) goto 9999 + + call a%cscnv(agpu,info,mold=aelg) + if (info /= 0) goto 9999 + xtmp = x%get_vect() + call xg%bld(xtmp,mold=vmold) + call bg%bld(size(xtmp),mold=vmold) + + ! Do sparse MV + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + + +9999 continue + if (info == 0) then + write(*,*) '42' + else + write(*,*) 'Something went wrong ',info + end if + + + call psb_cuda_exit() + call psb_exit(ctxt) + stop +end program my_cuda_test +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} +program my_cuda_test + use psb_base_mod + use psb_util_mod + use psb_ext_mod + use psb_cuda_mod + type(psb_dspmat_type) :: a, agpu + type(psb_d_vect_type) :: x, xg, bg + + real(psb_dpk_), allocatable :: xtmp(:) + type(psb_d_vect_cuda) :: vmold + type(psb_d_elg_sparse_mat) :: aelg + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + call psb_cuda_init(ctxt, iam) + + + ! My own home-grown matrix generator + call gen_matrix(ctxt,idim,desc_a,a,x,info) + if (info /= 0) goto 9999 + + call a%cscnv(agpu,info,mold=aelg) + if (info /= 0) goto 9999 + xtmp = x%get_vect() + call xg%bld(xtmp,mold=vmold) + call bg%bld(size(xtmp),mold=vmold) + + ! Do sparse MV + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + + +9999 continue + if (info == 0) then + write(*,*) '42' + else + write(*,*) 'Something went wrong ',info + end if + + + call psb_cuda_exit() + call psb_exit(ctxt) + stop +end program my_cuda_test +\end{verbatim} + \end{minipage} + \end{center} +\fi + +A full example of this strategy can be seen in the +\texttt{test/ext/kernel} and \texttt{test/\-cuda/\-kernel} subdirectories, +where we provide sample programs +to test the speed of the sparse matrix-vector product with the various +data structures included in the library. + + +\subsection{Extensions' Data Structures} +\label{sec:ext-datastruct} +%\ifthenelse{\boolean{mtc}}{\minitoc}{} + +Access to the facilities provided by the EXT library is mainly +achieved through the data types that are provided within. +The data classes are derived from the base classes in PSBLAS, through +the Fortran~2003 mechanism of \emph{type extension}~\cite{MRC:11}. + +The data classes are divided between the general purpose CPU +extensions, the GPU interfaces and the RSB interfaces. +In the description we will make use of the notation introduced in +Table~\ref{tab:notation}. + +\begin{table}[ht] +\caption{Notation for parameters describing a sparse matrix} +\begin{center} +{\footnotesize +\begin{tabular}{ll} +\hline +Name & Description \\ +\hline +M & Number of rows in matrix \\ +N & Number of columns in matrix \\ +NZ & Number of nonzeros in matrix \\ +AVGNZR & Average number of nonzeros per row \\ +MAXNZR & Maximum number of nonzeros per row \\ +NDIAG & Numero of nonzero diagonals\\ +AS & Coefficients array \\ +IA & Row indices array \\ +JA & Column indices array \\ +IRP & Row start pointers array \\ +JCP & Column start pointers array \\ +NZR & Number of nonzeros per row array \\ +OFFSET & Offset for diagonals \\ +\hline +\end{tabular} +} +\end{center} +\label{tab:notation} +\end{table} + +\begin{figure}[ht] + \centering +% \includegraphics[width=5.2cm]{figures/mat.eps} +\ifcase\pdfoutput + \includegraphics[width=5.2cm]{mat.png} +\or + \includegraphics[width=5.2cm]{figures/mat.pdf} +\fi + \caption{Example of sparse matrix} + \label{fig:dense} +\end{figure} + +\subsection{CPU-class extensions} + + +\subsubsection*{ELLPACK} + +The ELLPACK/ITPACK format (shown in Figure~\ref{fig:ell}) +comprises two 2-dimensional arrays \verb|AS| and +\verb|JA| with \verb|M| rows and \verb|MAXNZR| columns, where +\verb|MAXNZR| is the maximum +number of nonzeros in any row~\cite{ELLPACK}. +Each row of the arrays \verb|AS| and \verb|JA| contains the +coefficients and column indices; rows shorter than +\verb|MAXNZR| are padded with zero coefficients and appropriate column +indices, e.g. the last valid one found in the same row. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{figures/ell.eps} +\ifcase\pdfoutput + \includegraphics[width=8.2cm]{ell.png} +\or + \includegraphics[width=8.2cm]{figures/ell.pdf} +\fi + \caption{ELLPACK compression of matrix in Figure~\ref{fig:dense}} + \label{fig:ell} +\end{figure} + + +\begin{algorithm} +\lstset{language=Fortran} +\small + \begin{lstlisting} + do i=1,n + t=0 + do j=1,maxnzr + t = t + as(i,j)*x(ja(i,j)) + end do + y(i) = t + end do + \end{lstlisting} + \caption{\label{alg:ell} Matrix-Vector product in ELL format} +\end{algorithm} +The matrix-vector product $y=Ax$ can be computed with the code shown in +Alg.~\ref{alg:ell}; it costs one memory write per outer iteration, +plus three memory reads and two floating-point operations per inner +iteration. + +Unless all rows have exactly the same number of nonzeros, some of the +coefficients in the \verb|AS| array will be zeros; therefore this +data structure will have an overhead both in terms of memory space +and redundant operations (multiplications by zero). The overhead can +be acceptable if: +\begin{enumerate} +\item The maximum number of nonzeros per row is not much larger than + the average; +\item The regularity of the data structure allows for faster code, + e.g. by allowing vectorization, thereby offsetting the additional + storage requirements. +\end{enumerate} +In the extreme case where the input matrix has one full row, the +ELLPACK structure would require more memory than the normal 2D array +storage. The ELLPACK storage format was very popular in the vector +computing days; in modern CPUs it is not quite as popular, but it +is the basis for many GPU formats. + +The relevant data type is \verb|psb_T_ell_sparse_mat|: +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! + + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + .... + end type psb_d_ell_sparse_mat +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! + + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + .... + end type psb_d_ell_sparse_mat +\end{verbatim} + \end{minipage} + \end{center} +\fi + + +\subsubsection*{Hacked ELLPACK} + +The \textit{hacked ELLPACK} (\textbf{HLL}) format +alleviates the main problem of the ELLPACK format, that is, +the amount of memory required by padding for sparse matrices in +which the maximum row length is larger than the average. + +The number of elements allocated to padding is $[(m*maxNR) - +(m*avgNR) = m*(maxNR-avgNR)]$ +for both \verb|AS| and \verb|JA| arrays, +where $m$ is equal to the number of rows of the matrix, $maxNR$ is the +maximum number of nonzero elements +in every row and $avgNR$ is the average number of nonzeros. +Therefore a single densely populated row can seriously affect the +total size of the allocation. + +To limit this effect, in the HLL format we break the original matrix +into equally sized groups of rows (called \textit{hacks}), and then store +these groups as independent matrices in ELLPACK format. +The groups can be arranged selecting rows in an arbitrarily manner; +indeed, if the rows are sorted by decreasing number of nonzeros we +obtain essentially the JAgged Diagonals format. +If the rows are not in the original order, then an additional vector +\textit{rIdx} is required, storing the actual row index for each row +in the data structure. + +The multiple ELLPACK-like buffers are stacked together inside a +single, one dimensional array; +an additional vector \textit{hackOffsets} is provided to keep track +of the individual submatrices. +All hacks have the same number of rows \textit{hackSize}; hence, +the \textit{hackOffsets} vector is an array of +$(m/hackSize)+1$ elements, each one pointing to the first index of a +submatrix inside the stacked \textit{cM}/\textit{rP} buffers, plus an +additional element pointing past the end of the last block, where the +next one would begin. +We thus have the property that +the elements of the $k$-th \textit{hack} are stored between +\verb|hackOffsets[k]| and +\verb|hackOffsets[k+1]|, similarly to what happens in the CSR format. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{../figures/hll.eps} +\ifcase\pdfoutput + \includegraphics[width=.72\textwidth]{hll.png} +\or + \includegraphics[width=.72\textwidth]{../figures/hll.pdf} +\fi + \caption{Hacked ELLPACK compression of matrix in Figure~\ref{fig:dense}} + \label{fig:hll} +\end{figure} + +With this data structure a very long row only affects one hack, and +therefore the additional memory is limited to the hack in which the +row appears. + +The relevant data type is \verb|psb_T_hll_sparse_mat|: +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! + integer(psb_ipk_) :: hksz + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + .... + end type +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! + integer(psb_ipk_) :: hksz + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + .... + end type +\end{verbatim} + \end{minipage} + \end{center} +\fi + +\subsubsection*{Diagonal storage} + + +The DIAgonal (DIA) format (shown in Figure~\ref{fig:dia}) +has a 2-dimensional array \verb|AS| containing in each column the +coefficients along a diagonal of the matrix, and an integer array +\verb|OFFSET| that determines where each diagonal starts. The +diagonals in \verb|AS| are padded with zeros as necessary. + +The code to compute the matrix-vector product $y=Ax$ is shown in Alg.~\ref{alg:dia}; +it costs one memory read per outer iteration, +plus three memory reads, one memory write and two floating-point +operations per inner iteration. The accesses to \verb|AS| and +\verb|x| are in strict sequential order, therefore no indirect +addressing is required. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{figures/dia.eps} +\ifcase\pdfoutput + \includegraphics[width=.72\textwidth]{dia.png} +\or + \includegraphics[width=.72\textwidth]{figures/dia.pdf} +\fi + \caption{DIA compression of matrix in Figure~\ref{fig:dense}} + \label{fig:dia} +\end{figure} + + +\begin{algorithm} +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + do j=1,ndiag + if (offset(j) > 0) then + ir1 = 1; ir2 = m - offset(j); + else + ir1 = 1 - offset(j); ir2 = m; + end if + do i=ir1,ir2 + y(i) = y(i) + alpha*as(i,j)*x(i+offset(j)) + end do + end do +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + do j=1,ndiag + if (offset(j) > 0) then + ir1 = 1; ir2 = m - offset(j); + else + ir1 = 1 - offset(j); ir2 = m; + end if + do i=ir1,ir2 + y(i) = y(i) + alpha*as(i,j)*x(i+offset(j)) + end do + end do +\end{verbatim} + \end{minipage} + \end{center} +\fi + \caption{\label{alg:dia} Matrix-Vector product in DIA format} +\end{algorithm} + + +The relevant data type is \verb|psb_T_dia_sparse_mat|: +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + end type +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + end type +\end{verbatim} + \end{minipage} + \end{center} +\fi + + + +\subsubsection*{Hacked DIA} + +Storage by DIAgonals is an attractive option for matrices whose +coefficients are located on a small set of diagonals, since they do +away with storing explicitly the indices and therefore reduce +significantly memory traffic. However, having a few coefficients +outside of the main set of diagonals may significantly increase the +amount of needed padding; moreover, while the DIA code is easily +vectorized, it does not necessarily make optimal use of the memory +hierarchy. While processing each diagonal we are updating entries in +the output vector \verb|y|, which is then accessed multiple times; if +the vector \verb|y| is too large to remain in the cache memory, the +associated cache miss penalty is paid multiple times. + +The \textit{hacked DIA} (\textbf{HDIA}) format was designed to contain +the amount of padding, by breaking the original matrix +into equally sized groups of rows (\textit{hacks}), and then storing +these groups as independent matrices in DIA format. This approach is +similar to that of HLL, and requires using an offset vector for each +submatrix. Again, similarly to HLL, the various submatrices are +stacked inside a linear array to improve memory management. The fact +that the matrix is accessed in slices helps in reducing cache misses, +especially regarding accesses to the %output +vector \verb|y|. + + +An additional vector \textit{hackOffsets} is provided to complete +the matrix format; given that \textit{hackSize} is the number of rows of each hack, +the \textit{hackOffsets} vector is made by an array of +$(m/hackSize)+1$ elements, pointing to the first diagonal offset of a +submatrix inside the stacked \textit{offsets} buffers, plus an +additional element equal to the number of nonzero diagonals in the whole matrix. +We thus have the property that +the number of diagonals of the $k$-th \textit{hack} is given by +\textit{hackOffsets[k+1] - hackOffsets[k]}. + +\begin{figure}[ht] + \centering +% \includegraphics[width=8.2cm]{../figures/hdia.eps} +\ifcase\pdfoutput + \includegraphics[width=.72\textwidth]{hdia.png} +\or + \includegraphics[width=.72\textwidth]{../figures/hdia.pdf} +\fi + \caption{Hacked DIA compression of matrix in Figure~\ref{fig:dense}} + \label{fig:hdia} +\end{figure} + +The relevant data type is \verb|psb_T_hdia_sparse_mat|: +\ifpdf +\begin{minted}[breaklines=true,bgcolor=bg,fontsize=\small]{fortran} + type pm + real(psb_dpk_), allocatable :: data(:,:) + end type pm + + type po + integer(psb_ipk_), allocatable :: off(:) + end type po + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format, extended. + ! + + type(pm), allocatable :: hdia(:) + type(po), allocatable :: offset(:) + integer(psb_ipk_) :: nblocks, nzeros + integer(psb_ipk_) :: hack = 64 + integer(psb_long_int_k_) :: dim=0 + + contains + .... + end type +\end{minted} +\else +\begin{center} + \begin{minipage}[tl]{0.9\textwidth} +\begin{verbatim} + type pm + real(psb_dpk_), allocatable :: data(:,:) + end type pm + + type po + integer(psb_ipk_), allocatable :: off(:) + end type po + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format, extended. + ! + + type(pm), allocatable :: hdia(:) + type(po), allocatable :: offset(:) + integer(psb_ipk_) :: nblocks, nzeros + integer(psb_ipk_) :: hack = 64 + integer(psb_long_int_k_) :: dim=0 + + contains + .... + end type +\end{verbatim} + \end{minipage} + \end{center} +\fi + + diff --git a/docs/src/figures/dia.pdf b/docs/src/figures/dia.pdf new file mode 100644 index 00000000..04b1777d Binary files /dev/null and b/docs/src/figures/dia.pdf differ diff --git a/docs/src/figures/dia.png b/docs/src/figures/dia.png new file mode 100644 index 00000000..de7db919 Binary files /dev/null and b/docs/src/figures/dia.png differ diff --git a/docs/src/figures/ell.pdf b/docs/src/figures/ell.pdf new file mode 100644 index 00000000..b3f2f177 Binary files /dev/null and b/docs/src/figures/ell.pdf differ diff --git a/docs/src/figures/ell.png b/docs/src/figures/ell.png new file mode 100644 index 00000000..31911882 Binary files /dev/null and b/docs/src/figures/ell.png differ diff --git a/docs/src/figures/hdia.pdf b/docs/src/figures/hdia.pdf new file mode 100644 index 00000000..62f570bf Binary files /dev/null and b/docs/src/figures/hdia.pdf differ diff --git a/docs/src/figures/hdia.png b/docs/src/figures/hdia.png new file mode 100644 index 00000000..08bfb5ff Binary files /dev/null and b/docs/src/figures/hdia.png differ diff --git a/docs/src/figures/hll.pdf b/docs/src/figures/hll.pdf new file mode 100644 index 00000000..47267c4f Binary files /dev/null and b/docs/src/figures/hll.pdf differ diff --git a/docs/src/figures/hll.png b/docs/src/figures/hll.png new file mode 100644 index 00000000..219b751a Binary files /dev/null and b/docs/src/figures/hll.png differ diff --git a/docs/src/figures/mat.pdf b/docs/src/figures/mat.pdf new file mode 100644 index 00000000..2c47520d Binary files /dev/null and b/docs/src/figures/mat.pdf differ diff --git a/docs/src/figures/mat.png b/docs/src/figures/mat.png new file mode 100644 index 00000000..d4f5c6f9 Binary files /dev/null and b/docs/src/figures/mat.png differ diff --git a/docs/src/figures/psblaslibraryext.png b/docs/src/figures/psblaslibraryext.png new file mode 100644 index 00000000..da3b3a9c Binary files /dev/null and b/docs/src/figures/psblaslibraryext.png differ diff --git a/docs/src/userguide.tex b/docs/src/userguide.tex index 9d4e1814..7113ca93 100644 --- a/docs/src/userguide.tex +++ b/docs/src/userguide.tex @@ -17,6 +17,7 @@ \newtheorem{theorem}{Theorem} \newtheorem{corollary}{Corollary} \usepackage{listings} +\usepackage{algorithm2e} \usepackage{minted} \usemintedstyle{friendly} \definecolor{bg}{rgb}{0.95,0.95,0.95} @@ -36,7 +37,7 @@ \relax \pdfcompresslevel=0 %-- 0 = none, 9 = best \pdfinfo{ %-- Info dictionary of PDF output /Author (Alfredo Buttari) - /Title (Parallel Sparse BLAS V. 3.8.0) + /Title (Parallel Sparse BLAS V. 3.9.0) /Subject (Parallel Sparse Basic Linear Algebra Subroutines) /Keywords (Computer Science Linear Algebra Fluid Dynamics Parallel Linux MPI PSBLAS Iterative Solvers Preconditioners) /Creator (pdfLaTeX) @@ -99,7 +100,7 @@ \begin{document} { -\pdfbookmark{PSBLAS-v3.8.0 User's Guide}{title} +\pdfbookmark{PSBLAS-v3.9.0 User's Guide}{title} \lstset{language=Fortran} \newlength{\centeroffset} \setlength{\centeroffset}{-0.5\oddsidemargin} @@ -109,7 +110,7 @@ \vspace*{\stretch{1}} \noindent\hspace*{\centeroffset}\makebox[0pt][l]{\begin{minipage}{\textwidth} \flushright -{\Huge\bfseries PSBLAS 3.8.0 User's guide +{\Huge\bfseries PSBLAS 3.9.0 User's guide } \noindent\rule[-1ex]{\textwidth}{5pt}\\[2.5ex] \hfill\emph{\Large A reference guide for the Parallel Sparse BLAS library} @@ -130,7 +131,7 @@ {\bfseries by Salvatore Filippone\\ and Alfredo Buttari}\\ -May 1st, 2022 +Aug 1st, 2024 \end{minipage}} } %\addtolength{\textwidth}{\centeroffset} @@ -159,7 +160,8 @@ May 1st, 2022 \include{util} \include{precs} \include{methods} - +\include{ext-intro} +\include{cuda} \cleardoublepage \input{biblio} diff --git a/docs/src/userhtml.tex b/docs/src/userhtml.tex index 5dc0e6cf..770f9194 100644 --- a/docs/src/userhtml.tex +++ b/docs/src/userhtml.tex @@ -17,6 +17,7 @@ \newtheorem{theorem}{Theorem} \newtheorem{corollary}{Corollary} \usepackage{listings} +\usepackage{algorithm2e} \usepackage{microtype} \ifpdf \newmintinline[fortinline]{fortran}{} @@ -94,9 +95,9 @@ Alfredo Buttari } \\ %\\[10ex] %\today -Software version: 3.8.0\\ +Software version: 3.9.0\\ %\today -May 1st, 2022 +Aug 1st, 2024 \cleardoublepage \begingroup \renewcommand*{\thepage}{toc} @@ -120,7 +121,8 @@ May 1st, 2022 \include{util} \include{precs} \include{methods} - +\include{ext-intro} +\include{cuda} \cleardoublepage \input{biblio} diff --git a/ext/Makefile b/ext/Makefile new file mode 100755 index 00000000..36b82433 --- /dev/null +++ b/ext/Makefile @@ -0,0 +1,84 @@ +include ../Make.inc +# +# Libraries used +# +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES= +LIBNAME=libpsb_ext.a + + +FOBJS= psb_d_ell_mat_mod.o psb_d_hll_mat_mod.o \ + psb_s_hll_mat_mod.o psb_s_ell_mat_mod.o \ + psb_c_hll_mat_mod.o psb_c_ell_mat_mod.o \ + psb_z_hll_mat_mod.o psb_z_ell_mat_mod.o \ + psb_d_dia_mat_mod.o psb_d_hdia_mat_mod.o \ + psb_s_dia_mat_mod.o psb_s_hdia_mat_mod.o \ + psb_c_dia_mat_mod.o psb_c_hdia_mat_mod.o \ + psb_z_dia_mat_mod.o psb_z_hdia_mat_mod.o \ + psb_s_dns_mat_mod.o psb_d_dns_mat_mod.o \ + psb_c_dns_mat_mod.o psb_z_dns_mat_mod.o \ + psi_ext_util_mod.o psi_i_ext_util_mod.o \ + psi_s_ext_util_mod.o psi_c_ext_util_mod.o \ + psi_d_ext_util_mod.o psi_z_ext_util_mod.o \ + psb_ext_mod.o + +COBJS= + +OBJS=$(COBJS) $(FOBJS) + +lib: objs ilib + ar cur $(LIBNAME) $(OBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +objs: $(OBJS) iobjs + /bin/cp -p *$(.mod) $(MODDIR) + + + +psb_ext_mod.o: psb_s_dia_mat_mod.o psb_d_dia_mat_mod.o \ + psb_c_dia_mat_mod.o psb_z_dia_mat_mod.o \ + psb_d_ell_mat_mod.o psb_d_hll_mat_mod.o \ + psb_s_hll_mat_mod.o psb_s_ell_mat_mod.o \ + psb_c_hll_mat_mod.o psb_c_ell_mat_mod.o \ + psb_z_hll_mat_mod.o psb_z_ell_mat_mod.o \ + psb_s_hdia_mat_mod.o psb_d_hdia_mat_mod.o \ + psb_c_hdia_mat_mod.o psb_z_hdia_mat_mod.o \ + psb_s_dns_mat_mod.o psb_d_dns_mat_mod.o \ + psb_c_dns_mat_mod.o psb_z_dns_mat_mod.o + +# psb_d_rsb_mat_mod.o psb_d_hdia_mat_mod.o +psi_ext_util_mod.o: psi_i_ext_util_mod.o \ + psi_s_ext_util_mod.o psi_c_ext_util_mod.o \ + psi_d_ext_util_mod.o psi_z_ext_util_mod.o + +psb_s_dia_mat_mod.o psb_c_dia_mat_mod.o psb_d_dia_mat_mod.o psb_z_dia_mat_mod.o: psi_ext_util_mod.o +psb_s_hdia_mat_mod.o psb_c_hdia_mat_mod.o psb_d_hdia_mat_mod.o psb_z_hdia_mat_mod.o: psi_ext_util_mod.o +psb_s_hll_mat_mod.o psb_c_hll_mat_mod.o psb_d_hll_mat_mod.o psb_z_hll_mat_mod.o: psi_ext_util_mod.o + +ilib: objs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) + +iobjs: $(OBJS) + $(MAKE) -C impl objs + +clean: cclean iclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean + +veryclean: clean + /bin/rm -f $(HERE)/$(LIBNAME) $(LIBMOD) *$(.mod) + + + + diff --git a/ext/impl/Makefile b/ext/impl/Makefile new file mode 100755 index 00000000..57593a54 --- /dev/null +++ b/ext/impl/Makefile @@ -0,0 +1,412 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +LIBNAME=libpsb_ext.a + +OBJS= \ +psb_s_cp_dia_from_coo.o \ +psb_s_cp_dia_to_coo.o \ +psb_s_cp_ell_from_coo.o \ +psb_s_cp_ell_from_fmt.o \ +psb_s_cp_ell_to_coo.o \ +psb_s_cp_ell_to_fmt.o \ +psb_s_cp_hdia_from_coo.o \ +psb_s_cp_hdia_to_coo.o \ +psb_s_cp_hll_from_coo.o \ +psb_s_cp_hll_from_fmt.o \ +psb_s_cp_hll_to_coo.o \ +psb_s_cp_hll_to_fmt.o \ +psb_s_dia_aclsum.o \ +psb_s_dia_allocate_mnnz.o \ +psb_s_dia_arwsum.o \ +psb_s_dia_colsum.o \ +psb_s_dia_csgetptn.o \ +psb_s_dia_csgetrow.o \ +psb_s_dia_csmm.o \ +psb_s_dia_csmv.o \ +psb_s_dia_get_diag.o \ +psb_s_dia_maxval.o \ +psb_s_dia_mold.o \ +psb_s_dia_print.o \ +psb_s_dia_reallocate_nz.o \ +psb_s_dia_reinit.o \ +psb_s_dia_rowsum.o \ +psb_s_dia_scal.o \ +psb_s_dia_scals.o \ +psb_s_ell_aclsum.o \ +psb_s_ell_allocate_mnnz.o \ +psb_s_ell_arwsum.o \ +psb_s_ell_colsum.o \ +psb_s_ell_csgetblk.o \ +psb_s_ell_csgetptn.o \ +psb_s_ell_csgetrow.o \ +psb_s_ell_csmm.o \ +psb_s_ell_csmv.o \ +psb_s_ell_csnm1.o \ +psb_s_ell_csnmi.o \ +psb_s_ell_csput.o \ +psb_s_ell_cssm.o \ +psb_s_ell_cssv.o \ +psb_s_ell_get_diag.o \ +psb_s_ell_maxval.o \ +psb_s_ell_mold.o \ +psb_s_ell_print.o \ +psb_s_ell_reallocate_nz.o \ +psb_s_ell_reinit.o \ +psb_s_ell_rowsum.o \ +psb_s_ell_scal.o \ +psb_s_ell_scals.o \ +psb_s_ell_trim.o \ +psb_s_hdia_allocate_mnnz.o \ +psb_s_hdia_csmv.o \ +psb_s_hdia_mold.o \ +psb_s_hdia_print.o \ +psb_s_hll_aclsum.o \ +psb_s_hll_allocate_mnnz.o \ +psb_s_hll_arwsum.o \ +psb_s_hll_colsum.o \ +psb_s_hll_csgetblk.o \ +psb_s_hll_csgetptn.o \ +psb_s_hll_csgetrow.o \ +psb_s_hll_csmm.o \ +psb_s_hll_csmv.o \ +psb_s_hll_csnm1.o \ +psb_s_hll_csnmi.o \ +psb_s_hll_csput.o \ +psb_s_hll_cssm.o \ +psb_s_hll_cssv.o \ +psb_s_hll_get_diag.o \ +psb_s_hll_maxval.o \ +psb_s_hll_mold.o \ +psb_s_hll_print.o \ +psb_s_hll_reallocate_nz.o \ +psb_s_hll_reinit.o \ +psb_s_hll_rowsum.o \ +psb_s_hll_scal.o \ +psb_s_hll_scals.o \ +psb_s_mv_dia_from_coo.o \ +psb_s_mv_ell_from_coo.o \ +psb_s_mv_ell_from_fmt.o \ +psb_s_mv_ell_to_coo.o \ +psb_s_mv_ell_to_fmt.o \ +psb_s_mv_hdia_from_coo.o \ +psb_s_mv_hdia_to_coo.o \ +psb_s_mv_hll_from_coo.o \ +psb_s_mv_hll_from_fmt.o \ +psb_s_mv_hll_to_coo.o \ +psb_s_mv_hll_to_fmt.o \ +psb_c_cp_dia_from_coo.o \ +psb_c_cp_dia_to_coo.o \ +psb_c_cp_ell_from_coo.o \ +psb_c_cp_ell_from_fmt.o \ +psb_c_cp_ell_to_coo.o \ +psb_c_cp_ell_to_fmt.o \ +psb_c_cp_hdia_from_coo.o \ +psb_c_cp_hdia_to_coo.o \ +psb_c_cp_hll_from_coo.o \ +psb_c_cp_hll_from_fmt.o \ +psb_c_cp_hll_to_coo.o \ +psb_c_cp_hll_to_fmt.o \ +psb_c_dia_aclsum.o \ +psb_c_dia_allocate_mnnz.o \ +psb_c_dia_arwsum.o \ +psb_c_dia_colsum.o \ +psb_c_dia_csgetptn.o \ +psb_c_dia_csgetrow.o \ +psb_c_dia_csmm.o \ +psb_c_dia_csmv.o \ +psb_c_dia_get_diag.o \ +psb_c_dia_maxval.o \ +psb_c_dia_mold.o \ +psb_c_dia_print.o \ +psb_c_dia_reallocate_nz.o \ +psb_c_dia_reinit.o \ +psb_c_dia_rowsum.o \ +psb_c_dia_scal.o \ +psb_c_dia_scals.o \ +psb_c_ell_aclsum.o \ +psb_c_ell_allocate_mnnz.o \ +psb_c_ell_arwsum.o \ +psb_c_ell_colsum.o \ +psb_c_ell_csgetblk.o \ +psb_c_ell_csgetptn.o \ +psb_c_ell_csgetrow.o \ +psb_c_ell_csmm.o \ +psb_c_ell_csmv.o \ +psb_c_ell_csnm1.o \ +psb_c_ell_csnmi.o \ +psb_c_ell_csput.o \ +psb_c_ell_cssm.o \ +psb_c_ell_cssv.o \ +psb_c_ell_get_diag.o \ +psb_c_ell_maxval.o \ +psb_c_ell_mold.o \ +psb_c_ell_print.o \ +psb_c_ell_reallocate_nz.o \ +psb_c_ell_reinit.o \ +psb_c_ell_rowsum.o \ +psb_c_ell_scal.o \ +psb_c_ell_scals.o \ +psb_c_ell_trim.o \ +psb_c_hdia_allocate_mnnz.o \ +psb_c_hdia_csmv.o \ +psb_c_hdia_mold.o \ +psb_c_hdia_print.o \ +psb_c_hll_aclsum.o \ +psb_c_hll_allocate_mnnz.o \ +psb_c_hll_arwsum.o \ +psb_c_hll_colsum.o \ +psb_c_hll_csgetblk.o \ +psb_c_hll_csgetptn.o \ +psb_c_hll_csgetrow.o \ +psb_c_hll_csmm.o \ +psb_c_hll_csmv.o \ +psb_c_hll_csnm1.o \ +psb_c_hll_csnmi.o \ +psb_c_hll_csput.o \ +psb_c_hll_cssm.o \ +psb_c_hll_cssv.o \ +psb_c_hll_get_diag.o \ +psb_c_hll_maxval.o \ +psb_c_hll_mold.o \ +psb_c_hll_print.o \ +psb_c_hll_reallocate_nz.o \ +psb_c_hll_reinit.o \ +psb_c_hll_rowsum.o \ +psb_c_hll_scal.o \ +psb_c_hll_scals.o \ +psb_c_mv_dia_from_coo.o \ +psb_c_mv_ell_from_coo.o \ +psb_c_mv_ell_from_fmt.o \ +psb_c_mv_ell_to_coo.o \ +psb_c_mv_ell_to_fmt.o \ +psb_c_mv_hdia_from_coo.o \ +psb_c_mv_hdia_to_coo.o \ +psb_c_mv_hll_from_coo.o \ +psb_c_mv_hll_from_fmt.o \ +psb_c_mv_hll_to_coo.o \ +psb_c_mv_hll_to_fmt.o \ +psb_d_cp_dia_from_coo.o \ +psb_d_cp_dia_to_coo.o \ +psb_d_cp_ell_from_coo.o \ +psb_d_cp_ell_from_fmt.o \ +psb_d_cp_ell_to_coo.o \ +psb_d_cp_ell_to_fmt.o \ +psb_d_cp_hdia_from_coo.o \ +psb_d_cp_hdia_to_coo.o \ +psb_d_cp_hll_from_coo.o \ +psb_d_cp_hll_from_fmt.o \ +psb_d_cp_hll_to_coo.o \ +psb_d_cp_hll_to_fmt.o \ +psb_d_dia_aclsum.o \ +psb_d_dia_allocate_mnnz.o \ +psb_d_dia_arwsum.o \ +psb_d_dia_colsum.o \ +psb_d_dia_csgetptn.o \ +psb_d_dia_csgetrow.o \ +psb_d_dia_csmm.o \ +psb_d_dia_csmv.o \ +psb_d_dia_get_diag.o \ +psb_d_dia_maxval.o \ +psb_d_dia_mold.o \ +psb_d_dia_print.o \ +psb_d_dia_reallocate_nz.o \ +psb_d_dia_reinit.o \ +psb_d_dia_rowsum.o \ +psb_d_dia_scal.o \ +psb_d_dia_scals.o \ +psb_d_ell_aclsum.o \ +psb_d_ell_allocate_mnnz.o \ +psb_d_ell_arwsum.o \ +psb_d_ell_colsum.o \ +psb_d_ell_csgetblk.o \ +psb_d_ell_csgetptn.o \ +psb_d_ell_csgetrow.o \ +psb_d_ell_csmm.o \ +psb_d_ell_csmv.o \ +psb_d_ell_csnm1.o \ +psb_d_ell_csnmi.o \ +psb_d_ell_csput.o \ +psb_d_ell_cssm.o \ +psb_d_ell_cssv.o \ +psb_d_ell_get_diag.o \ +psb_d_ell_maxval.o \ +psb_d_ell_mold.o \ +psb_d_ell_print.o \ +psb_d_ell_reallocate_nz.o \ +psb_d_ell_reinit.o \ +psb_d_ell_rowsum.o \ +psb_d_ell_scal.o \ +psb_d_ell_scals.o \ +psb_d_ell_trim.o \ +psb_d_hdia_allocate_mnnz.o \ +psb_d_hdia_csmv.o \ +psb_d_hdia_mold.o \ +psb_d_hdia_print.o \ +psb_d_hll_aclsum.o \ +psb_d_hll_allocate_mnnz.o \ +psb_d_hll_arwsum.o \ +psb_d_hll_colsum.o \ +psb_d_hll_csgetblk.o \ +psb_d_hll_csgetptn.o \ +psb_d_hll_csgetrow.o \ +psb_d_hll_csmm.o \ +psb_d_hll_csmv.o \ +psb_d_hll_csnm1.o \ +psb_d_hll_csnmi.o \ +psb_d_hll_csput.o \ +psb_d_hll_cssm.o \ +psb_d_hll_cssv.o \ +psb_d_hll_get_diag.o \ +psb_d_hll_maxval.o \ +psb_d_hll_mold.o \ +psb_d_hll_print.o \ +psb_d_hll_reallocate_nz.o \ +psb_d_hll_reinit.o \ +psb_d_hll_rowsum.o \ +psb_d_hll_scal.o \ +psb_d_hll_scals.o \ +psb_d_mv_dia_from_coo.o \ +psb_d_mv_ell_from_coo.o \ +psb_d_mv_ell_from_fmt.o \ +psb_d_mv_ell_to_coo.o \ +psb_d_mv_ell_to_fmt.o \ +psb_d_mv_hdia_from_coo.o \ +psb_d_mv_hdia_to_coo.o \ +psb_d_mv_hll_from_coo.o \ +psb_d_mv_hll_from_fmt.o \ +psb_d_mv_hll_to_coo.o \ +psb_d_mv_hll_to_fmt.o \ +psb_z_cp_dia_from_coo.o \ +psb_z_cp_dia_to_coo.o \ +psb_z_cp_ell_from_coo.o \ +psb_z_cp_ell_from_fmt.o \ +psb_z_cp_ell_to_coo.o \ +psb_z_cp_ell_to_fmt.o \ +psb_z_cp_hdia_from_coo.o \ +psb_z_cp_hdia_to_coo.o \ +psb_z_cp_hll_from_coo.o \ +psb_z_cp_hll_from_fmt.o \ +psb_z_cp_hll_to_coo.o \ +psb_z_cp_hll_to_fmt.o \ +psb_z_dia_aclsum.o \ +psb_z_dia_allocate_mnnz.o \ +psb_z_dia_arwsum.o \ +psb_z_dia_colsum.o \ +psb_z_dia_csgetptn.o \ +psb_z_dia_csgetrow.o \ +psb_z_dia_csmm.o \ +psb_z_dia_csmv.o \ +psb_z_dia_get_diag.o \ +psb_z_dia_maxval.o \ +psb_z_dia_mold.o \ +psb_z_dia_print.o \ +psb_z_dia_reallocate_nz.o \ +psb_z_dia_reinit.o \ +psb_z_dia_rowsum.o \ +psb_z_dia_scal.o \ +psb_z_dia_scals.o \ +psb_z_ell_aclsum.o \ +psb_z_ell_allocate_mnnz.o \ +psb_z_ell_arwsum.o \ +psb_z_ell_colsum.o \ +psb_z_ell_csgetblk.o \ +psb_z_ell_csgetptn.o \ +psb_z_ell_csgetrow.o \ +psb_z_ell_csmm.o \ +psb_z_ell_csmv.o \ +psb_z_ell_csnm1.o \ +psb_z_ell_csnmi.o \ +psb_z_ell_csput.o \ +psb_z_ell_cssm.o \ +psb_z_ell_cssv.o \ +psb_z_ell_get_diag.o \ +psb_z_ell_maxval.o \ +psb_z_ell_mold.o \ +psb_z_ell_print.o \ +psb_z_ell_reallocate_nz.o \ +psb_z_ell_reinit.o \ +psb_z_ell_rowsum.o \ +psb_z_ell_scal.o \ +psb_z_ell_scals.o \ +psb_z_ell_trim.o \ +psb_z_hdia_allocate_mnnz.o \ +psb_z_hdia_csmv.o \ +psb_z_hdia_mold.o \ +psb_z_hdia_print.o \ +psb_z_hll_aclsum.o \ +psb_z_hll_allocate_mnnz.o \ +psb_z_hll_arwsum.o \ +psb_z_hll_colsum.o \ +psb_z_hll_csgetblk.o \ +psb_z_hll_csgetptn.o \ +psb_z_hll_csgetrow.o \ +psb_z_hll_csmm.o \ +psb_z_hll_csmv.o \ +psb_z_hll_csnm1.o \ +psb_z_hll_csnmi.o \ +psb_z_hll_csput.o \ +psb_z_hll_cssm.o \ +psb_z_hll_cssv.o \ +psb_z_hll_get_diag.o \ +psb_z_hll_maxval.o \ +psb_z_hll_mold.o \ +psb_z_hll_print.o \ +psb_z_hll_reallocate_nz.o \ +psb_z_hll_reinit.o \ +psb_z_hll_rowsum.o \ +psb_z_hll_scal.o \ +psb_z_hll_scals.o \ +psb_z_mv_dia_from_coo.o \ +psb_z_mv_ell_from_coo.o \ +psb_z_mv_ell_from_fmt.o \ +psb_z_mv_ell_to_coo.o \ +psb_z_mv_ell_to_fmt.o \ +psb_z_mv_hdia_from_coo.o \ +psb_z_mv_hdia_to_coo.o \ +psb_z_mv_hll_from_coo.o \ +psb_z_mv_hll_from_fmt.o \ +psb_z_mv_hll_to_coo.o \ +psb_z_mv_hll_to_fmt.o \ +psi_s_xtr_ell_from_coo.o \ +psi_c_xtr_ell_from_coo.o \ +psi_d_xtr_ell_from_coo.o \ +psi_z_xtr_ell_from_coo.o \ +psi_s_convert_ell_from_coo.o \ +psi_c_convert_ell_from_coo.o \ +psi_d_convert_ell_from_coo.o \ +psi_z_convert_ell_from_coo.o \ +psi_s_convert_hll_from_coo.o \ +psi_c_convert_hll_from_coo.o \ +psi_d_convert_hll_from_coo.o \ +psi_z_convert_hll_from_coo.o \ +psi_s_xtr_dia_from_coo.o \ +psi_c_xtr_dia_from_coo.o \ +psi_d_xtr_dia_from_coo.o \ +psi_z_xtr_dia_from_coo.o \ +psi_s_xtr_coo_from_dia.o \ +psi_d_xtr_coo_from_dia.o \ +psi_c_xtr_coo_from_dia.o \ +psi_z_xtr_coo_from_dia.o \ +psi_s_convert_dia_from_coo.o \ +psi_c_convert_dia_from_coo.o \ +psi_d_convert_dia_from_coo.o \ +psi_z_convert_dia_from_coo.o \ +psb_s_dns_mat_impl.o \ +psb_d_dns_mat_impl.o \ +psb_c_dns_mat_impl.o \ +psb_z_dns_mat_impl.o + +objs: $(OBJS) + +lib: objs + ar cur ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) diff --git a/ext/impl/psb_c_cp_dia_from_coo.f90 b/ext/impl/psb_c_cp_dia_from_coo.f90 new file mode 100644 index 00000000..2d2b1caa --- /dev/null +++ b/ext/impl/psb_c_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_cp_dia_from_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_dia_from_coo diff --git a/ext/impl/psb_c_cp_dia_to_coo.f90 b/ext/impl/psb_c_cp_dia_to_coo.f90 new file mode 100644 index 00000000..9975bec0 --- /dev/null +++ b/ext/impl/psb_c_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_cp_dia_to_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + call psi_c_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_c_cp_dia_to_coo diff --git a/ext/impl/psb_c_cp_ell_from_coo.f90 b/ext/impl/psb_c_cp_ell_from_coo.f90 new file mode 100644 index 00000000..28d7d242 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_c_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_c_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_c_cp_ell_from_coo diff --git a/ext/impl/psb_c_cp_ell_from_fmt.f90 b/ext/impl/psb_c_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..309063b9 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_from_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_c_cp_ell_from_fmt diff --git a/ext/impl/psb_c_cp_ell_to_coo.f90 b/ext/impl/psb_c_cp_ell_to_coo.f90 new file mode 100644 index 00000000..ec6bcff5 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_to_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_c_cp_ell_to_coo diff --git a/ext/impl/psb_c_cp_ell_to_fmt.f90 b/ext/impl/psb_c_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..0c6a6903 --- /dev/null +++ b/ext/impl/psb_c_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_cp_ell_to_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_ell_to_fmt diff --git a/ext/impl/psb_c_cp_hdia_from_coo.f90 b/ext/impl/psb_c_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..a9e1ca21 --- /dev/null +++ b/ext/impl/psb_c_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_cp_hdia_from_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_c_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_c_cp_hdia_from_coo diff --git a/ext/impl/psb_c_cp_hdia_to_coo.f90 b/ext/impl/psb_c_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..32801653 --- /dev/null +++ b/ext/impl/psb_c_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_c_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_c_cp_hdia_to_coo diff --git a/ext/impl/psb_c_cp_hll_from_coo.f90 b/ext/impl/psb_c_cp_hll_from_coo.f90 new file mode 100644 index 00000000..506196c2 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_from_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_cp_hll_from_coo diff --git a/ext/impl/psb_c_cp_hll_from_fmt.f90 b/ext/impl/psb_c_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..0849561f --- /dev/null +++ b/ext/impl/psb_c_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_from_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_c_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_c_cp_hll_from_fmt diff --git a/ext/impl/psb_c_cp_hll_to_coo.f90 b/ext/impl/psb_c_cp_hll_to_coo.f90 new file mode 100644 index 00000000..0ff46352 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_to_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_c_cp_hll_to_coo diff --git a/ext/impl/psb_c_cp_hll_to_fmt.f90 b/ext/impl/psb_c_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..df8fa3b7 --- /dev/null +++ b/ext/impl/psb_c_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_cp_hll_to_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_c_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_cp_hll_to_fmt diff --git a/ext/impl/psb_c_dia_aclsum.f90 b/ext/impl/psb_c_dia_aclsum.f90 new file mode 100644 index 00000000..4bd8d440 --- /dev/null +++ b/ext/impl/psb_c_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_aclsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_aclsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_aclsum diff --git a/ext/impl/psb_c_dia_allocate_mnnz.f90 b/ext/impl/psb_c_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..37fb34e1 --- /dev/null +++ b/ext/impl/psb_c_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_allocate_mnnz diff --git a/ext/impl/psb_c_dia_arwsum.f90 b/ext/impl/psb_c_dia_arwsum.f90 new file mode 100644 index 00000000..fe40deb8 --- /dev/null +++ b/ext/impl/psb_c_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_arwsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_arwsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_arwsum diff --git a/ext/impl/psb_c_dia_colsum.f90 b/ext/impl/psb_c_dia_colsum.f90 new file mode 100644 index 00000000..ed43fa12 --- /dev/null +++ b/ext/impl/psb_c_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_colsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_colsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_colsum diff --git a/ext/impl/psb_c_dia_csgetptn.f90 b/ext/impl/psb_c_dia_csgetptn.f90 new file mode 100644 index 00000000..ad479d35 --- /dev/null +++ b/ext/impl/psb_c_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csgetptn + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_c_dia_csgetptn diff --git a/ext/impl/psb_c_dia_csgetrow.f90 b/ext/impl/psb_c_dia_csgetrow.f90 new file mode 100644 index 00000000..2989b20f --- /dev/null +++ b/ext/impl/psb_c_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csgetrow + implicit none + + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_c_dia_csgetrow diff --git a/ext/impl/psb_c_dia_csmm.f90 b/ext/impl/psb_c_dia_csmm.f90 new file mode 100644 index 00000000..b65c4651 --- /dev/null +++ b/ext/impl/psb_c_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csmm + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_c_dia_csmm_inner + +end subroutine psb_c_dia_csmm diff --git a/ext/impl/psb_c_dia_csmv.f90 b/ext/impl/psb_c_dia_csmv.f90 new file mode 100644 index 00000000..cf1ef677 --- /dev/null +++ b/ext/impl/psb_c_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_csmv + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_c_dia_csmv_inner + +end subroutine psb_c_dia_csmv diff --git a/ext/impl/psb_c_dia_get_diag.f90 b/ext/impl/psb_c_dia_get_diag.f90 new file mode 100644 index 00000000..d868b62d --- /dev/null +++ b/ext/impl/psb_c_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_get_diag + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = cone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_get_diag diff --git a/ext/impl/psb_c_dia_maxval.f90 b/ext/impl/psb_c_dia_maxval.f90 new file mode 100644 index 00000000..03a2be82 --- /dev/null +++ b/ext/impl/psb_c_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_c_dia_maxval(a) result(res) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_maxval + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_c_dia_maxval diff --git a/ext/impl/psb_c_dia_mold.f90 b/ext/impl/psb_c_dia_mold.f90 new file mode 100644 index 00000000..1d694828 --- /dev/null +++ b/ext/impl/psb_c_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_mold(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_mold + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_mold diff --git a/ext/impl/psb_c_dia_print.f90 b/ext/impl/psb_c_dia_print.f90 new file mode 100644 index 00000000..f3233366 --- /dev/null +++ b/ext/impl/psb_c_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dia_print' + logical, parameter :: debug=.false. + + class(psb_c_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_c_dia_print diff --git a/ext/impl/psb_c_dia_reallocate_nz.f90 b/ext/impl/psb_c_dia_reallocate_nz.f90 new file mode 100644 index 00000000..c46cd465 --- /dev/null +++ b/ext/impl/psb_c_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_reallocate_nz diff --git a/ext/impl/psb_c_dia_reinit.f90 b/ext/impl/psb_c_dia_reinit.f90 new file mode 100644 index 00000000..04a345eb --- /dev/null +++ b/ext/impl/psb_c_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_reinit(a,clear) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_reinit + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = czero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_reinit diff --git a/ext/impl/psb_c_dia_rowsum.f90 b/ext/impl/psb_c_dia_rowsum.f90 new file mode 100644 index 00000000..1f36dab4 --- /dev/null +++ b/ext/impl/psb_c_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_rowsum(d,a) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_rowsum + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_rowsum diff --git a/ext/impl/psb_c_dia_scal.f90 b/ext/impl/psb_c_dia_scal.f90 new file mode 100644 index 00000000..8f35b7c1 --- /dev/null +++ b/ext/impl/psb_c_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_scal + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_scal diff --git a/ext/impl/psb_c_dia_scals.f90 b/ext/impl/psb_c_dia_scals.f90 new file mode 100644 index 00000000..a9ca5db1 --- /dev/null +++ b/ext/impl/psb_c_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_dia_scals(d,a,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_dia_scals + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dia_scals diff --git a/ext/impl/psb_c_dns_mat_impl.f90 b/ext/impl/psb_c_dns_mat_impl.f90 new file mode 100644 index 00000000..8e99af8b --- /dev/null +++ b/ext/impl/psb_c_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_c_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_c_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csmv + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='c_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call cgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_csmv + + +!> Function csmm: +!! \memberof psb_c_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_c_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csmm + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='c_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call cgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_c_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_c_dns_csnmi(a) result(res) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csnmi + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_spk_) :: acc + + res = szero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_c_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_c_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_c_dns_get_diag(a,d,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_get_diag + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = czero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_c_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_c_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_c_dns_sparse_mat +!! \brief Allocate a class(psb_c_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_c_dns_mold(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_mold + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_c_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_c_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_c_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = czero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_c_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_c_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_csgetrow + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_c_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_c_dns_trim(a) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_dns_trim + implicit none + class(psb_c_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Copy and convert from psb_c_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_c_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_cp_dns_from_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_c_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = czero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = czero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Copy and convert to psb_c_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_c_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_cp_dns_to_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= czero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_c_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Convert to psb_c_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_c_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_mv_dns_to_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_c_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_c_dns_sparse_mat +!! \brief Convert from psb_c_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_c_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_c_dns_mat_mod, psb_protect_name => psb_c_mv_dns_from_coo + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_dns_from_coo + diff --git a/ext/impl/psb_c_ell_aclsum.f90 b/ext/impl/psb_c_ell_aclsum.f90 new file mode 100644 index 00000000..3d5a292a --- /dev/null +++ b/ext/impl/psb_c_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_aclsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_aclsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_aclsum diff --git a/ext/impl/psb_c_ell_allocate_mnnz.f90 b/ext/impl/psb_c_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..b137eb04 --- /dev/null +++ b/ext/impl/psb_c_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_allocate_mnnz diff --git a/ext/impl/psb_c_ell_arwsum.f90 b/ext/impl/psb_c_ell_arwsum.f90 new file mode 100644 index 00000000..c047c742 --- /dev/null +++ b/ext/impl/psb_c_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_arwsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_arwsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_arwsum diff --git a/ext/impl/psb_c_ell_colsum.f90 b/ext/impl/psb_c_ell_colsum.f90 new file mode 100644 index 00000000..6d06b589 --- /dev/null +++ b/ext/impl/psb_c_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_colsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_colsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_colsum diff --git a/ext/impl/psb_c_ell_csgetblk.f90 b/ext/impl/psb_c_ell_csgetblk.f90 new file mode 100644 index 00000000..deb07c25 --- /dev/null +++ b/ext/impl/psb_c_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csgetblk + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_csgetblk diff --git a/ext/impl/psb_c_ell_csgetptn.f90 b/ext/impl/psb_c_ell_csgetptn.f90 new file mode 100644 index 00000000..821daa89 --- /dev/null +++ b/ext/impl/psb_c_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csgetptn + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_ell_csgetrow + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_ell_csmm + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ell_csmv + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_c_ell_csnm1 + + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_spk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = sone + else + vt(:) = szero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_c_ell_csnm1 diff --git a/ext/impl/psb_c_ell_csnmi.f90 b/ext/impl/psb_c_ell_csnmi.f90 new file mode 100644 index 00000000..6dc9cfa4 --- /dev/null +++ b/ext/impl/psb_c_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csnmi + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + sone + res = max(res,acc) + end do + +end function psb_c_ell_csnmi diff --git a/ext/impl/psb_c_ell_csput.f90 b/ext/impl/psb_c_ell_csput.f90 new file mode 100644 index 00000000..e0b0f47f --- /dev/null +++ b/ext/impl/psb_c_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_csput_a + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_c_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_c_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='c_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_c_ell_srch_upd +end subroutine psb_c_ell_csput_a diff --git a/ext/impl/psb_c_ell_cssm.f90 b/ext/impl/psb_c_ell_cssm.f90 new file mode 100644 index 00000000..26e76030 --- /dev/null +++ b/ext/impl/psb_c_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_cssm + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_c_ell_cssv + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_spk_) :: acc + complex(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_c_ell_get_diag + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = cone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = czero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_get_diag diff --git a/ext/impl/psb_c_ell_maxval.f90 b/ext/impl/psb_c_ell_maxval.f90 new file mode 100644 index 00000000..4de58b11 --- /dev/null +++ b/ext/impl/psb_c_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_ell_maxval(a) result(res) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_maxval + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_c_ell_maxval diff --git a/ext/impl/psb_c_ell_mold.f90 b/ext/impl/psb_c_ell_mold.f90 new file mode 100644 index 00000000..c7c5d621 --- /dev/null +++ b/ext/impl/psb_c_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_mold(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_mold + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_mold diff --git a/ext/impl/psb_c_ell_print.f90 b/ext/impl/psb_c_ell_print.f90 new file mode 100644 index 00000000..1b8117a8 --- /dev/null +++ b/ext/impl/psb_c_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_c_ell_print diff --git a/ext/impl/psb_c_ell_reallocate_nz.f90 b/ext/impl/psb_c_ell_reallocate_nz.f90 new file mode 100644 index 00000000..b0d77568 --- /dev/null +++ b/ext/impl/psb_c_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_reallocate_nz diff --git a/ext/impl/psb_c_ell_reinit.f90 b/ext/impl/psb_c_ell_reinit.f90 new file mode 100644 index 00000000..2b15dfea --- /dev/null +++ b/ext/impl/psb_c_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_reinit(a,clear) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_reinit + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = czero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_reinit diff --git a/ext/impl/psb_c_ell_rowsum.f90 b/ext/impl/psb_c_ell_rowsum.f90 new file mode 100644 index 00000000..5ae7d42c --- /dev/null +++ b/ext/impl/psb_c_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_rowsum(d,a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_rowsum + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = cone + else + d(i) = czero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_rowsum diff --git a/ext/impl/psb_c_ell_scal.f90 b/ext/impl/psb_c_ell_scal.f90 new file mode 100644 index 00000000..63150f32 --- /dev/null +++ b/ext/impl/psb_c_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_scal + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_scal diff --git a/ext/impl/psb_c_ell_scals.f90 b/ext/impl/psb_c_ell_scals.f90 new file mode 100644 index 00000000..3e4cd92a --- /dev/null +++ b/ext/impl/psb_c_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_scals(d,a,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_scals + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_scals diff --git a/ext/impl/psb_c_ell_trim.f90 b/ext/impl/psb_c_ell_trim.f90 new file mode 100644 index 00000000..22aafefd --- /dev/null +++ b/ext/impl/psb_c_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_ell_trim(a) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_ell_trim + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_ell_trim diff --git a/ext/impl/psb_c_hdia_allocate_mnnz.f90 b/ext/impl/psb_c_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..17a49ffe --- /dev/null +++ b/ext/impl/psb_c_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hdia_allocate_mnnz diff --git a/ext/impl/psb_c_hdia_csmv.f90 b/ext/impl/psb_c_hdia_csmv.f90 new file mode 100644 index 00000000..a04fde07 --- /dev/null +++ b/ext/impl/psb_c_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_c_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_csmv + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_c_inner_dia_csmv + +end subroutine psb_c_hdia_csmv diff --git a/ext/impl/psb_c_hdia_mold.f90 b/ext/impl/psb_c_hdia_mold.f90 new file mode 100644 index 00000000..d9f85ec9 --- /dev/null +++ b/ext/impl/psb_c_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hdia_mold(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_mold + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hdia_mold diff --git a/ext/impl/psb_c_hdia_print.f90 b/ext/impl/psb_c_hdia_print.f90 new file mode 100644 index 00000000..477a5433 --- /dev/null +++ b/ext/impl/psb_c_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_c_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + complex(psb_spk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_c_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_c_hdia_print diff --git a/ext/impl/psb_c_hll_aclsum.f90 b/ext/impl/psb_c_hll_aclsum.f90 new file mode 100644 index 00000000..f1bd8e89 --- /dev/null +++ b/ext/impl/psb_c_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_aclsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_aclsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_aclsum + +end subroutine psb_c_hll_aclsum diff --git a/ext/impl/psb_c_hll_allocate_mnnz.f90 b/ext/impl/psb_c_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..97b996bd --- /dev/null +++ b/ext/impl/psb_c_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_allocate_mnnz diff --git a/ext/impl/psb_c_hll_arwsum.f90 b/ext/impl/psb_c_hll_arwsum.f90 new file mode 100644 index 00000000..9c48e1c0 --- /dev/null +++ b/ext/impl/psb_c_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_arwsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_arwsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_arwsum + +end subroutine psb_c_hll_arwsum diff --git a/ext/impl/psb_c_hll_colsum.f90 b/ext/impl/psb_c_hll_colsum.f90 new file mode 100644 index 00000000..fbcb0934 --- /dev/null +++ b/ext/impl/psb_c_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_colsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_colsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = cone + else + d = czero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine c_hll_colsum + +end subroutine psb_c_hll_colsum diff --git a/ext/impl/psb_c_hll_csgetblk.f90 b/ext/impl/psb_c_hll_csgetblk.f90 new file mode 100644 index 00000000..9bf0b869 --- /dev/null +++ b/ext/impl/psb_c_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csgetblk + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_csgetblk diff --git a/ext/impl/psb_c_hll_csgetptn.f90 b/ext/impl/psb_c_hll_csgetptn.f90 new file mode 100644 index 00000000..0f6481ed --- /dev/null +++ b/ext/impl/psb_c_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csgetptn + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_hll_csgetrow + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_c_hll_csmm + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + complex(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_c_hll_csmv + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_c_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_c_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == cone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == cone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*conjg(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == czero) then + if (beta == czero) then + do i=1,m + y(ir+i-1) = czero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == czero) then + do i=1,m + tmp = czero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = czero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_c_hll_csmv_inner + + subroutine psb_c_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == czero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_8 + + subroutine psb_c_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == czero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_24 + + subroutine psb_c_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == czero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_16 + + subroutine psb_c_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == czero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_32 + + subroutine psb_c_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, czero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = czero + if (alpha /= czero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == czero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_c_hll_csmv_notra_4 + +end subroutine psb_c_hll_csmv diff --git a/ext/impl/psb_c_hll_csnm1.f90 b/ext/impl/psb_c_hll_csnm1.f90 new file mode 100644 index 00000000..25daa75d --- /dev/null +++ b/ext/impl/psb_c_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csnm1 + + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_spk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = szero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = sone + else + vt = szero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_c_hll_csnm1_inner + +end function psb_c_hll_csnm1 diff --git a/ext/impl/psb_c_hll_csnmi.f90 b/ext/impl/psb_c_hll_csnmi.f90 new file mode 100644 index 00000000..c70be9ce --- /dev/null +++ b/ext/impl/psb_c_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csnmi + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='c_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = szero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = sone + else + tmp = szero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_c_hll_csnmi_inner + +end function psb_c_hll_csnmi diff --git a/ext/impl/psb_c_hll_csput.f90 b/ext/impl/psb_c_hll_csput.f90 new file mode 100644 index 00000000..e46ae30a --- /dev/null +++ b/ext/impl/psb_c_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_csput_a + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_c_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='c_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_c_hll_srch_upd + +end subroutine psb_c_hll_csput_a diff --git a/ext/impl/psb_c_hll_cssm.f90 b/ext/impl/psb_c_hll_cssm.f90 new file mode 100644 index 00000000..90e3b978 --- /dev/null +++ b/ext/impl/psb_c_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_cssm + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + complex(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_c_hll_cssv + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + complex(psb_spk_) :: acc + complex(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='c_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_c_hll_get_diag + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = cone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_c_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = czero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = czero + end if + end do + + end subroutine psb_c_hll_get_diag_inner + +end subroutine psb_c_hll_get_diag diff --git a/ext/impl/psb_c_hll_maxval.f90 b/ext/impl/psb_c_hll_maxval.f90 new file mode 100644 index 00000000..ff82bb40 --- /dev/null +++ b/ext/impl/psb_c_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_c_hll_maxval(a) result(res) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_maxval + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,sone) + +end function psb_c_hll_maxval diff --git a/ext/impl/psb_c_hll_mold.f90 b/ext/impl/psb_c_hll_mold.f90 new file mode 100644 index 00000000..4a6204b0 --- /dev/null +++ b/ext/impl/psb_c_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_mold(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_mold + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_c_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_mold diff --git a/ext/impl/psb_c_hll_print.f90 b/ext/impl/psb_c_hll_print.f90 new file mode 100644 index 00000000..a5eec378 --- /dev/null +++ b/ext/impl/psb_c_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_c_hll_print diff --git a/ext/impl/psb_c_hll_reallocate_nz.f90 b/ext/impl/psb_c_hll_reallocate_nz.f90 new file mode 100644 index 00000000..44d9cfc9 --- /dev/null +++ b/ext/impl/psb_c_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='c_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_reallocate_nz diff --git a/ext/impl/psb_c_hll_reinit.f90 b/ext/impl/psb_c_hll_reinit.f90 new file mode 100644 index 00000000..82d5cb16 --- /dev/null +++ b/ext/impl/psb_c_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_reinit(a,clear) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_reinit + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = czero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_reinit diff --git a/ext/impl/psb_c_hll_rowsum.f90 b/ext/impl/psb_c_hll_rowsum.f90 new file mode 100644 index 00000000..e6eea227 --- /dev/null +++ b/ext/impl/psb_c_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_rowsum(d,a) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_rowsum + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = cone + else + d = czero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call c_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine c_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: val(ldv,*) + complex(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine c_hll_rowsum + +end subroutine psb_c_hll_rowsum diff --git a/ext/impl/psb_c_hll_scal.f90 b/ext/impl/psb_c_hll_scal.f90 new file mode 100644 index 00000000..0fd59f15 --- /dev/null +++ b/ext/impl/psb_c_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_scal + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_c_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_c_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_spk_), intent(in) :: d(*) + complex(psb_spk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_c_hll_scal_inner + + +end subroutine psb_c_hll_scal diff --git a/ext/impl/psb_c_hll_scals.f90 b/ext/impl/psb_c_hll_scals.f90 new file mode 100644 index 00000000..13a03a22 --- /dev/null +++ b/ext/impl/psb_c_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_hll_scals(d,a,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_hll_scals + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_c_hll_scals diff --git a/ext/impl/psb_c_mv_dia_from_coo.f90 b/ext/impl/psb_c_mv_dia_from_coo.f90 new file mode 100644 index 00000000..99871348 --- /dev/null +++ b/ext/impl/psb_c_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_mv_dia_from_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_dia_from_coo diff --git a/ext/impl/psb_c_mv_dia_to_coo.f90 b/ext/impl/psb_c_mv_dia_to_coo.f90 new file mode 100644 index 00000000..1382cec3 --- /dev/null +++ b/ext/impl/psb_c_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psb_c_mv_dia_to_coo + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_dia_to_coo diff --git a/ext/impl/psb_c_mv_ell_from_coo.f90 b/ext/impl/psb_c_mv_ell_from_coo.f90 new file mode 100644 index 00000000..64da3e8d --- /dev/null +++ b/ext/impl/psb_c_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_from_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_c_mv_ell_from_coo diff --git a/ext/impl/psb_c_mv_ell_from_fmt.f90 b/ext/impl/psb_c_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..d0fa9bc4 --- /dev/null +++ b/ext/impl/psb_c_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_from_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_c_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_ell_from_fmt diff --git a/ext/impl/psb_c_mv_ell_to_coo.f90 b/ext/impl/psb_c_mv_ell_to_coo.f90 new file mode 100644 index 00000000..a49e2e3c --- /dev/null +++ b/ext/impl/psb_c_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_to_coo + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_ell_to_coo diff --git a/ext/impl/psb_c_mv_ell_to_fmt.f90 b/ext/impl/psb_c_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..3ea02d6b --- /dev/null +++ b/ext/impl/psb_c_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psb_c_mv_ell_to_fmt + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_c_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_ell_to_fmt diff --git a/ext/impl/psb_c_mv_hdia_from_coo.f90 b/ext/impl/psb_c_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..4247fdf8 --- /dev/null +++ b/ext/impl/psb_c_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_mv_hdia_from_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hdia_from_coo diff --git a/ext/impl/psb_c_mv_hdia_to_coo.f90 b/ext/impl/psb_c_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..3a91917a --- /dev/null +++ b/ext/impl/psb_c_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_c_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hdia_mat_mod, psb_protect_name => psb_c_mv_hdia_to_coo + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_hdia_to_coo diff --git a/ext/impl/psb_c_mv_hll_from_coo.f90 b/ext/impl/psb_c_mv_hll_from_coo.f90 new file mode 100644 index 00000000..b78bdd80 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_from_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_c_mv_hll_from_coo diff --git a/ext/impl/psb_c_mv_hll_from_fmt.f90 b/ext/impl/psb_c_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..add90355 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_from_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_c_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hll_from_fmt diff --git a/ext/impl/psb_c_mv_hll_to_coo.f90 b/ext/impl/psb_c_mv_hll_to_coo.f90 new file mode 100644 index 00000000..fbf5dfcd --- /dev/null +++ b/ext/impl/psb_c_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_to_coo + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_c_mv_hll_to_coo diff --git a/ext/impl/psb_c_mv_hll_to_fmt.f90 b/ext/impl/psb_c_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..37d77e85 --- /dev/null +++ b/ext/impl/psb_c_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_c_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psb_c_mv_hll_to_fmt + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_c_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_c_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_c_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_c_mv_hll_to_fmt diff --git a/ext/impl/psb_d_cp_dia_from_coo.f90 b/ext/impl/psb_d_cp_dia_from_coo.f90 new file mode 100644 index 00000000..b640565f --- /dev/null +++ b/ext/impl/psb_d_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_cp_dia_from_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_dia_from_coo diff --git a/ext/impl/psb_d_cp_dia_to_coo.f90 b/ext/impl/psb_d_cp_dia_to_coo.f90 new file mode 100644 index 00000000..527c96d0 --- /dev/null +++ b/ext/impl/psb_d_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_cp_dia_to_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + call psi_d_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_d_cp_dia_to_coo diff --git a/ext/impl/psb_d_cp_ell_from_coo.f90 b/ext/impl/psb_d_cp_ell_from_coo.f90 new file mode 100644 index 00000000..cf23a0e0 --- /dev/null +++ b/ext/impl/psb_d_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_d_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_d_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_d_cp_ell_from_coo diff --git a/ext/impl/psb_d_cp_ell_from_fmt.f90 b/ext/impl/psb_d_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..ce8a8d7e --- /dev/null +++ b/ext/impl/psb_d_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_from_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_d_cp_ell_from_fmt diff --git a/ext/impl/psb_d_cp_ell_to_coo.f90 b/ext/impl/psb_d_cp_ell_to_coo.f90 new file mode 100644 index 00000000..8e7ad735 --- /dev/null +++ b/ext/impl/psb_d_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_to_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_d_cp_ell_to_coo diff --git a/ext/impl/psb_d_cp_ell_to_fmt.f90 b/ext/impl/psb_d_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..fd05d0fd --- /dev/null +++ b/ext/impl/psb_d_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_cp_ell_to_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_ell_to_fmt diff --git a/ext/impl/psb_d_cp_hdia_from_coo.f90 b/ext/impl/psb_d_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..bbc34195 --- /dev/null +++ b/ext/impl/psb_d_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_cp_hdia_from_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_d_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_d_cp_hdia_from_coo diff --git a/ext/impl/psb_d_cp_hdia_to_coo.f90 b/ext/impl/psb_d_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..bfa77b08 --- /dev/null +++ b/ext/impl/psb_d_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_d_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_d_cp_hdia_to_coo diff --git a/ext/impl/psb_d_cp_hll_from_coo.f90 b/ext/impl/psb_d_cp_hll_from_coo.f90 new file mode 100644 index 00000000..03028d20 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_from_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_hll_from_coo diff --git a/ext/impl/psb_d_cp_hll_from_fmt.f90 b/ext/impl/psb_d_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..785b23ac --- /dev/null +++ b/ext/impl/psb_d_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_from_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_d_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_d_cp_hll_from_fmt diff --git a/ext/impl/psb_d_cp_hll_to_coo.f90 b/ext/impl/psb_d_cp_hll_to_coo.f90 new file mode 100644 index 00000000..b20144c5 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_to_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_d_cp_hll_to_coo diff --git a/ext/impl/psb_d_cp_hll_to_fmt.f90 b/ext/impl/psb_d_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..6c60c5b5 --- /dev/null +++ b/ext/impl/psb_d_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_cp_hll_to_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_d_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_cp_hll_to_fmt diff --git a/ext/impl/psb_d_dia_aclsum.f90 b/ext/impl/psb_d_dia_aclsum.f90 new file mode 100644 index 00000000..0f4df6ca --- /dev/null +++ b/ext/impl/psb_d_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_aclsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_aclsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_aclsum diff --git a/ext/impl/psb_d_dia_allocate_mnnz.f90 b/ext/impl/psb_d_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..309b7d4a --- /dev/null +++ b/ext/impl/psb_d_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_allocate_mnnz diff --git a/ext/impl/psb_d_dia_arwsum.f90 b/ext/impl/psb_d_dia_arwsum.f90 new file mode 100644 index 00000000..98eefc44 --- /dev/null +++ b/ext/impl/psb_d_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_arwsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_arwsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_arwsum diff --git a/ext/impl/psb_d_dia_colsum.f90 b/ext/impl/psb_d_dia_colsum.f90 new file mode 100644 index 00000000..6a6eb81c --- /dev/null +++ b/ext/impl/psb_d_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_colsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_colsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_colsum diff --git a/ext/impl/psb_d_dia_csgetptn.f90 b/ext/impl/psb_d_dia_csgetptn.f90 new file mode 100644 index 00000000..ad0e040a --- /dev/null +++ b/ext/impl/psb_d_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csgetptn + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_d_dia_csgetptn diff --git a/ext/impl/psb_d_dia_csgetrow.f90 b/ext/impl/psb_d_dia_csgetrow.f90 new file mode 100644 index 00000000..7e05a26e --- /dev/null +++ b/ext/impl/psb_d_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csgetrow + implicit none + + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_d_dia_csgetrow diff --git a/ext/impl/psb_d_dia_csmm.f90 b/ext/impl/psb_d_dia_csmm.f90 new file mode 100644 index 00000000..81ad967d --- /dev/null +++ b/ext/impl/psb_d_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csmm + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_d_dia_csmm_inner + +end subroutine psb_d_dia_csmm diff --git a/ext/impl/psb_d_dia_csmv.f90 b/ext/impl/psb_d_dia_csmv.f90 new file mode 100644 index 00000000..166b4c58 --- /dev/null +++ b/ext/impl/psb_d_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_csmv + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_d_dia_csmv_inner + +end subroutine psb_d_dia_csmv diff --git a/ext/impl/psb_d_dia_get_diag.f90 b/ext/impl/psb_d_dia_get_diag.f90 new file mode 100644 index 00000000..bbcb4a12 --- /dev/null +++ b/ext/impl/psb_d_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_get_diag + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = done + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_get_diag diff --git a/ext/impl/psb_d_dia_maxval.f90 b/ext/impl/psb_d_dia_maxval.f90 new file mode 100644 index 00000000..f57be1ff --- /dev/null +++ b/ext/impl/psb_d_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_d_dia_maxval(a) result(res) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_maxval + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_d_dia_maxval diff --git a/ext/impl/psb_d_dia_mold.f90 b/ext/impl/psb_d_dia_mold.f90 new file mode 100644 index 00000000..2b3cef81 --- /dev/null +++ b/ext/impl/psb_d_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_mold(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_mold + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_mold diff --git a/ext/impl/psb_d_dia_print.f90 b/ext/impl/psb_d_dia_print.f90 new file mode 100644 index 00000000..e32dc2ed --- /dev/null +++ b/ext/impl/psb_d_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dia_print' + logical, parameter :: debug=.false. + + class(psb_d_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_d_dia_print diff --git a/ext/impl/psb_d_dia_reallocate_nz.f90 b/ext/impl/psb_d_dia_reallocate_nz.f90 new file mode 100644 index 00000000..83864dd8 --- /dev/null +++ b/ext/impl/psb_d_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_reallocate_nz diff --git a/ext/impl/psb_d_dia_reinit.f90 b/ext/impl/psb_d_dia_reinit.f90 new file mode 100644 index 00000000..f1e91ade --- /dev/null +++ b/ext/impl/psb_d_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_reinit(a,clear) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_reinit + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = dzero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_reinit diff --git a/ext/impl/psb_d_dia_rowsum.f90 b/ext/impl/psb_d_dia_rowsum.f90 new file mode 100644 index 00000000..7a5875ba --- /dev/null +++ b/ext/impl/psb_d_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_rowsum(d,a) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_rowsum + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_rowsum diff --git a/ext/impl/psb_d_dia_scal.f90 b/ext/impl/psb_d_dia_scal.f90 new file mode 100644 index 00000000..d87c0d25 --- /dev/null +++ b/ext/impl/psb_d_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_scal + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_scal diff --git a/ext/impl/psb_d_dia_scals.f90 b/ext/impl/psb_d_dia_scals.f90 new file mode 100644 index 00000000..a3958f57 --- /dev/null +++ b/ext/impl/psb_d_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_dia_scals(d,a,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_dia_scals + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dia_scals diff --git a/ext/impl/psb_d_dns_mat_impl.f90 b/ext/impl/psb_d_dns_mat_impl.f90 new file mode 100644 index 00000000..edf5cde4 --- /dev/null +++ b/ext/impl/psb_d_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_d_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_d_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csmv + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='d_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call dgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_csmv + + +!> Function csmm: +!! \memberof psb_d_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_d_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csmm + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='d_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call dgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_d_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_d_dns_csnmi(a) result(res) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csnmi + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_dpk_) :: acc + + res = dzero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_d_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_d_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_d_dns_get_diag(a,d,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_get_diag + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = dzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_d_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_d_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_d_dns_sparse_mat +!! \brief Allocate a class(psb_d_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_d_dns_mold(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_mold + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_d_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_d_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_d_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = dzero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_d_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_d_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_csgetrow + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_d_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_d_dns_trim(a) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_dns_trim + implicit none + class(psb_d_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Copy and convert from psb_d_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_d_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_cp_dns_from_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_d_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = dzero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = dzero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Copy and convert to psb_d_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_d_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_cp_dns_to_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= dzero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_d_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Convert to psb_d_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_d_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_mv_dns_to_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_d_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_d_dns_sparse_mat +!! \brief Convert from psb_d_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_d_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_d_dns_mat_mod, psb_protect_name => psb_d_mv_dns_from_coo + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_dns_from_coo + diff --git a/ext/impl/psb_d_ell_aclsum.f90 b/ext/impl/psb_d_ell_aclsum.f90 new file mode 100644 index 00000000..e0bfc18d --- /dev/null +++ b/ext/impl/psb_d_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_aclsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_aclsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_aclsum diff --git a/ext/impl/psb_d_ell_allocate_mnnz.f90 b/ext/impl/psb_d_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..95e4558c --- /dev/null +++ b/ext/impl/psb_d_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_allocate_mnnz diff --git a/ext/impl/psb_d_ell_arwsum.f90 b/ext/impl/psb_d_ell_arwsum.f90 new file mode 100644 index 00000000..6bf3b888 --- /dev/null +++ b/ext/impl/psb_d_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_arwsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_arwsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_arwsum diff --git a/ext/impl/psb_d_ell_colsum.f90 b/ext/impl/psb_d_ell_colsum.f90 new file mode 100644 index 00000000..9eb30ca0 --- /dev/null +++ b/ext/impl/psb_d_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_colsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_colsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_colsum diff --git a/ext/impl/psb_d_ell_csgetblk.f90 b/ext/impl/psb_d_ell_csgetblk.f90 new file mode 100644 index 00000000..9725518f --- /dev/null +++ b/ext/impl/psb_d_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csgetblk + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_csgetblk diff --git a/ext/impl/psb_d_ell_csgetptn.f90 b/ext/impl/psb_d_ell_csgetptn.f90 new file mode 100644 index 00000000..a050fe54 --- /dev/null +++ b/ext/impl/psb_d_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csgetptn + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_ell_csgetrow + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_ell_csmm + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ell_csmv + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_d_ell_csnm1 + + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = done + else + vt(:) = dzero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_d_ell_csnm1 diff --git a/ext/impl/psb_d_ell_csnmi.f90 b/ext/impl/psb_d_ell_csnmi.f90 new file mode 100644 index 00000000..b4e3d03e --- /dev/null +++ b/ext/impl/psb_d_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csnmi + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + done + res = max(res,acc) + end do + +end function psb_d_ell_csnmi diff --git a/ext/impl/psb_d_ell_csput.f90 b/ext/impl/psb_d_ell_csput.f90 new file mode 100644 index 00000000..d38d9d51 --- /dev/null +++ b/ext/impl/psb_d_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_csput_a + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_d_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_d_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='d_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_d_ell_srch_upd +end subroutine psb_d_ell_csput_a diff --git a/ext/impl/psb_d_ell_cssm.f90 b/ext/impl/psb_d_ell_cssm.f90 new file mode 100644 index 00000000..3c8b5f21 --- /dev/null +++ b/ext/impl/psb_d_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_cssm + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_d_ell_cssv + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_d_ell_get_diag + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = done + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = dzero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_get_diag diff --git a/ext/impl/psb_d_ell_maxval.f90 b/ext/impl/psb_d_ell_maxval.f90 new file mode 100644 index 00000000..d0cb24d3 --- /dev/null +++ b/ext/impl/psb_d_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_ell_maxval(a) result(res) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_maxval + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_d_ell_maxval diff --git a/ext/impl/psb_d_ell_mold.f90 b/ext/impl/psb_d_ell_mold.f90 new file mode 100644 index 00000000..48814f3c --- /dev/null +++ b/ext/impl/psb_d_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_mold(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_mold + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_mold diff --git a/ext/impl/psb_d_ell_print.f90 b/ext/impl/psb_d_ell_print.f90 new file mode 100644 index 00000000..cf539662 --- /dev/null +++ b/ext/impl/psb_d_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_d_ell_print diff --git a/ext/impl/psb_d_ell_reallocate_nz.f90 b/ext/impl/psb_d_ell_reallocate_nz.f90 new file mode 100644 index 00000000..8f92ffad --- /dev/null +++ b/ext/impl/psb_d_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_reallocate_nz diff --git a/ext/impl/psb_d_ell_reinit.f90 b/ext/impl/psb_d_ell_reinit.f90 new file mode 100644 index 00000000..ab9a7ba2 --- /dev/null +++ b/ext/impl/psb_d_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_reinit(a,clear) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_reinit + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = dzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_reinit diff --git a/ext/impl/psb_d_ell_rowsum.f90 b/ext/impl/psb_d_ell_rowsum.f90 new file mode 100644 index 00000000..782775d4 --- /dev/null +++ b/ext/impl/psb_d_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_rowsum(d,a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_rowsum + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_rowsum diff --git a/ext/impl/psb_d_ell_scal.f90 b/ext/impl/psb_d_ell_scal.f90 new file mode 100644 index 00000000..15be8a66 --- /dev/null +++ b/ext/impl/psb_d_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_scal + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_scal diff --git a/ext/impl/psb_d_ell_scals.f90 b/ext/impl/psb_d_ell_scals.f90 new file mode 100644 index 00000000..501f42b0 --- /dev/null +++ b/ext/impl/psb_d_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_scals(d,a,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_scals + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_scals diff --git a/ext/impl/psb_d_ell_trim.f90 b/ext/impl/psb_d_ell_trim.f90 new file mode 100644 index 00000000..8b1d52f7 --- /dev/null +++ b/ext/impl/psb_d_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_ell_trim(a) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_ell_trim + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_ell_trim diff --git a/ext/impl/psb_d_hdia_allocate_mnnz.f90 b/ext/impl/psb_d_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..e5721754 --- /dev/null +++ b/ext/impl/psb_d_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hdia_allocate_mnnz diff --git a/ext/impl/psb_d_hdia_csmv.f90 b/ext/impl/psb_d_hdia_csmv.f90 new file mode 100644 index 00000000..82599342 --- /dev/null +++ b/ext/impl/psb_d_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_d_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_csmv + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_d_inner_dia_csmv + +end subroutine psb_d_hdia_csmv diff --git a/ext/impl/psb_d_hdia_mold.f90 b/ext/impl/psb_d_hdia_mold.f90 new file mode 100644 index 00000000..cebedd44 --- /dev/null +++ b/ext/impl/psb_d_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hdia_mold(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_mold + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hdia_mold diff --git a/ext/impl/psb_d_hdia_print.f90 b/ext/impl/psb_d_hdia_print.f90 new file mode 100644 index 00000000..43753299 --- /dev/null +++ b/ext/impl/psb_d_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_d_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + real(psb_dpk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_d_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_d_hdia_print diff --git a/ext/impl/psb_d_hll_aclsum.f90 b/ext/impl/psb_d_hll_aclsum.f90 new file mode 100644 index 00000000..1f868edc --- /dev/null +++ b/ext/impl/psb_d_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_aclsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_aclsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_aclsum + +end subroutine psb_d_hll_aclsum diff --git a/ext/impl/psb_d_hll_allocate_mnnz.f90 b/ext/impl/psb_d_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..f58d0e4a --- /dev/null +++ b/ext/impl/psb_d_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_allocate_mnnz diff --git a/ext/impl/psb_d_hll_arwsum.f90 b/ext/impl/psb_d_hll_arwsum.f90 new file mode 100644 index 00000000..e5ae24fb --- /dev/null +++ b/ext/impl/psb_d_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_arwsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_arwsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_arwsum + +end subroutine psb_d_hll_arwsum diff --git a/ext/impl/psb_d_hll_colsum.f90 b/ext/impl/psb_d_hll_colsum.f90 new file mode 100644 index 00000000..8c2020ec --- /dev/null +++ b/ext/impl/psb_d_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_colsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_colsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine d_hll_colsum + +end subroutine psb_d_hll_colsum diff --git a/ext/impl/psb_d_hll_csgetblk.f90 b/ext/impl/psb_d_hll_csgetblk.f90 new file mode 100644 index 00000000..185baf29 --- /dev/null +++ b/ext/impl/psb_d_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csgetblk + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_csgetblk diff --git a/ext/impl/psb_d_hll_csgetptn.f90 b/ext/impl/psb_d_hll_csgetptn.f90 new file mode 100644 index 00000000..a7cdc148 --- /dev/null +++ b/ext/impl/psb_d_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csgetptn + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_hll_csgetrow + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_d_hll_csmm + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + real(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_d_hll_csmv + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_d_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_d_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == done) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == done) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == dzero) then + if (beta == dzero) then + do i=1,m + y(ir+i-1) = dzero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == dzero) then + do i=1,m + tmp = dzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = dzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_d_hll_csmv_inner + + subroutine psb_d_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_8 + + subroutine psb_d_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_24 + + subroutine psb_d_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_16 + + subroutine psb_d_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_32 + + subroutine psb_d_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, dzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = dzero + if (alpha /= dzero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == dzero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_d_hll_csmv_notra_4 + +end subroutine psb_d_hll_csmv diff --git a/ext/impl/psb_d_hll_csnm1.f90 b/ext/impl/psb_d_hll_csnm1.f90 new file mode 100644 index 00000000..4627a4d2 --- /dev/null +++ b/ext/impl/psb_d_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csnm1 + + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_dpk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = dzero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = done + else + vt = dzero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_d_hll_csnm1_inner + +end function psb_d_hll_csnm1 diff --git a/ext/impl/psb_d_hll_csnmi.f90 b/ext/impl/psb_d_hll_csnmi.f90 new file mode 100644 index 00000000..2b758fa3 --- /dev/null +++ b/ext/impl/psb_d_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csnmi + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='d_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = dzero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = done + else + tmp = dzero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_d_hll_csnmi_inner + +end function psb_d_hll_csnmi diff --git a/ext/impl/psb_d_hll_csput.f90 b/ext/impl/psb_d_hll_csput.f90 new file mode 100644 index 00000000..064e6c59 --- /dev/null +++ b/ext/impl/psb_d_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_csput_a + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_d_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='d_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_d_hll_srch_upd + +end subroutine psb_d_hll_csput_a diff --git a/ext/impl/psb_d_hll_cssm.f90 b/ext/impl/psb_d_hll_cssm.f90 new file mode 100644 index 00000000..f4f6e349 --- /dev/null +++ b/ext/impl/psb_d_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_cssm + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + real(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_d_hll_cssv + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + real(psb_dpk_) :: acc + real(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_d_hll_get_diag + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = done + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_d_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = dzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = dzero + end if + end do + + end subroutine psb_d_hll_get_diag_inner + +end subroutine psb_d_hll_get_diag diff --git a/ext/impl/psb_d_hll_maxval.f90 b/ext/impl/psb_d_hll_maxval.f90 new file mode 100644 index 00000000..8408cc96 --- /dev/null +++ b/ext/impl/psb_d_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_d_hll_maxval(a) result(res) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_maxval + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,done) + +end function psb_d_hll_maxval diff --git a/ext/impl/psb_d_hll_mold.f90 b/ext/impl/psb_d_hll_mold.f90 new file mode 100644 index 00000000..e9d721f0 --- /dev/null +++ b/ext/impl/psb_d_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_mold(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_mold + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_d_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_mold diff --git a/ext/impl/psb_d_hll_print.f90 b/ext/impl/psb_d_hll_print.f90 new file mode 100644 index 00000000..93c56d5c --- /dev/null +++ b/ext/impl/psb_d_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_d_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_d_hll_print diff --git a/ext/impl/psb_d_hll_reallocate_nz.f90 b/ext/impl/psb_d_hll_reallocate_nz.f90 new file mode 100644 index 00000000..7abdd58f --- /dev/null +++ b/ext/impl/psb_d_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='d_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_reallocate_nz diff --git a/ext/impl/psb_d_hll_reinit.f90 b/ext/impl/psb_d_hll_reinit.f90 new file mode 100644 index 00000000..6a0f34fa --- /dev/null +++ b/ext/impl/psb_d_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_reinit(a,clear) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_reinit + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = dzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_reinit diff --git a/ext/impl/psb_d_hll_rowsum.f90 b/ext/impl/psb_d_hll_rowsum.f90 new file mode 100644 index 00000000..bfa2d2e1 --- /dev/null +++ b/ext/impl/psb_d_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_rowsum(d,a) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_rowsum + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = done + else + d = dzero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call d_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine d_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine d_hll_rowsum + +end subroutine psb_d_hll_rowsum diff --git a/ext/impl/psb_d_hll_scal.f90 b/ext/impl/psb_d_hll_scal.f90 new file mode 100644 index 00000000..ed9dd9ce --- /dev/null +++ b/ext/impl/psb_d_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_scal + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_d_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_d_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_dpk_), intent(in) :: d(*) + real(psb_dpk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_d_hll_scal_inner + + +end subroutine psb_d_hll_scal diff --git a/ext/impl/psb_d_hll_scals.f90 b/ext/impl/psb_d_hll_scals.f90 new file mode 100644 index 00000000..8e05cddd --- /dev/null +++ b/ext/impl/psb_d_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_hll_scals(d,a,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_hll_scals + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_hll_scals diff --git a/ext/impl/psb_d_mv_dia_from_coo.f90 b/ext/impl/psb_d_mv_dia_from_coo.f90 new file mode 100644 index 00000000..e38e975a --- /dev/null +++ b/ext/impl/psb_d_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_mv_dia_from_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_dia_from_coo diff --git a/ext/impl/psb_d_mv_dia_to_coo.f90 b/ext/impl/psb_d_mv_dia_to_coo.f90 new file mode 100644 index 00000000..d8ac7a69 --- /dev/null +++ b/ext/impl/psb_d_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psb_d_mv_dia_to_coo + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_dia_to_coo diff --git a/ext/impl/psb_d_mv_ell_from_coo.f90 b/ext/impl/psb_d_mv_ell_from_coo.f90 new file mode 100644 index 00000000..8f98daab --- /dev/null +++ b/ext/impl/psb_d_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_from_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_d_mv_ell_from_coo diff --git a/ext/impl/psb_d_mv_ell_from_fmt.f90 b/ext/impl/psb_d_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..6589fd0a --- /dev/null +++ b/ext/impl/psb_d_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_from_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_d_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_ell_from_fmt diff --git a/ext/impl/psb_d_mv_ell_to_coo.f90 b/ext/impl/psb_d_mv_ell_to_coo.f90 new file mode 100644 index 00000000..a1220a6e --- /dev/null +++ b/ext/impl/psb_d_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_to_coo + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_ell_to_coo diff --git a/ext/impl/psb_d_mv_ell_to_fmt.f90 b/ext/impl/psb_d_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..a5975360 --- /dev/null +++ b/ext/impl/psb_d_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psb_d_mv_ell_to_fmt + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_d_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_ell_to_fmt diff --git a/ext/impl/psb_d_mv_hdia_from_coo.f90 b/ext/impl/psb_d_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..68caea34 --- /dev/null +++ b/ext/impl/psb_d_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_mv_hdia_from_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hdia_from_coo diff --git a/ext/impl/psb_d_mv_hdia_to_coo.f90 b/ext/impl/psb_d_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..595e20a2 --- /dev/null +++ b/ext/impl/psb_d_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_d_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hdia_mat_mod, psb_protect_name => psb_d_mv_hdia_to_coo + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_hdia_to_coo diff --git a/ext/impl/psb_d_mv_hll_from_coo.f90 b/ext/impl/psb_d_mv_hll_from_coo.f90 new file mode 100644 index 00000000..78faed4b --- /dev/null +++ b/ext/impl/psb_d_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_from_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_hll_from_coo diff --git a/ext/impl/psb_d_mv_hll_from_fmt.f90 b/ext/impl/psb_d_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..76a2f2fb --- /dev/null +++ b/ext/impl/psb_d_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_from_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_d_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hll_from_fmt diff --git a/ext/impl/psb_d_mv_hll_to_coo.f90 b/ext/impl/psb_d_mv_hll_to_coo.f90 new file mode 100644 index 00000000..fbc9111b --- /dev/null +++ b/ext/impl/psb_d_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_to_coo + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_d_mv_hll_to_coo diff --git a/ext/impl/psb_d_mv_hll_to_fmt.f90 b/ext/impl/psb_d_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..8022b2e5 --- /dev/null +++ b/ext/impl/psb_d_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psb_d_mv_hll_to_fmt + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_d_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_d_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_d_mv_hll_to_fmt diff --git a/ext/impl/psb_s_cp_dia_from_coo.f90 b/ext/impl/psb_s_cp_dia_from_coo.f90 new file mode 100644 index 00000000..6d9a0749 --- /dev/null +++ b/ext/impl/psb_s_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_cp_dia_from_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_dia_from_coo diff --git a/ext/impl/psb_s_cp_dia_to_coo.f90 b/ext/impl/psb_s_cp_dia_to_coo.f90 new file mode 100644 index 00000000..c0cd5d32 --- /dev/null +++ b/ext/impl/psb_s_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_cp_dia_to_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + call psi_s_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_s_cp_dia_to_coo diff --git a/ext/impl/psb_s_cp_ell_from_coo.f90 b/ext/impl/psb_s_cp_ell_from_coo.f90 new file mode 100644 index 00000000..f178a05c --- /dev/null +++ b/ext/impl/psb_s_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_s_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_s_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_s_cp_ell_from_coo diff --git a/ext/impl/psb_s_cp_ell_from_fmt.f90 b/ext/impl/psb_s_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..bffe3d85 --- /dev/null +++ b/ext/impl/psb_s_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_from_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_s_cp_ell_from_fmt diff --git a/ext/impl/psb_s_cp_ell_to_coo.f90 b/ext/impl/psb_s_cp_ell_to_coo.f90 new file mode 100644 index 00000000..b8acddfc --- /dev/null +++ b/ext/impl/psb_s_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_to_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_s_cp_ell_to_coo diff --git a/ext/impl/psb_s_cp_ell_to_fmt.f90 b/ext/impl/psb_s_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..58fe3756 --- /dev/null +++ b/ext/impl/psb_s_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_cp_ell_to_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_ell_to_fmt diff --git a/ext/impl/psb_s_cp_hdia_from_coo.f90 b/ext/impl/psb_s_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..b3d427d9 --- /dev/null +++ b/ext/impl/psb_s_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_cp_hdia_from_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_s_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_s_cp_hdia_from_coo diff --git a/ext/impl/psb_s_cp_hdia_to_coo.f90 b/ext/impl/psb_s_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..8e90e236 --- /dev/null +++ b/ext/impl/psb_s_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_s_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_s_cp_hdia_to_coo diff --git a/ext/impl/psb_s_cp_hll_from_coo.f90 b/ext/impl/psb_s_cp_hll_from_coo.f90 new file mode 100644 index 00000000..9d75f994 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_from_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_cp_hll_from_coo diff --git a/ext/impl/psb_s_cp_hll_from_fmt.f90 b/ext/impl/psb_s_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..8f010902 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_from_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_s_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_s_cp_hll_from_fmt diff --git a/ext/impl/psb_s_cp_hll_to_coo.f90 b/ext/impl/psb_s_cp_hll_to_coo.f90 new file mode 100644 index 00000000..74502ba2 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_to_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + real(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_s_cp_hll_to_coo diff --git a/ext/impl/psb_s_cp_hll_to_fmt.f90 b/ext/impl/psb_s_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..f7adaa54 --- /dev/null +++ b/ext/impl/psb_s_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_cp_hll_to_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_s_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_cp_hll_to_fmt diff --git a/ext/impl/psb_s_dia_aclsum.f90 b/ext/impl/psb_s_dia_aclsum.f90 new file mode 100644 index 00000000..718a2424 --- /dev/null +++ b/ext/impl/psb_s_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_aclsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_aclsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_aclsum diff --git a/ext/impl/psb_s_dia_allocate_mnnz.f90 b/ext/impl/psb_s_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..df56c4a6 --- /dev/null +++ b/ext/impl/psb_s_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_allocate_mnnz diff --git a/ext/impl/psb_s_dia_arwsum.f90 b/ext/impl/psb_s_dia_arwsum.f90 new file mode 100644 index 00000000..5a974bbf --- /dev/null +++ b/ext/impl/psb_s_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_arwsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_arwsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_arwsum diff --git a/ext/impl/psb_s_dia_colsum.f90 b/ext/impl/psb_s_dia_colsum.f90 new file mode 100644 index 00000000..e60eb88f --- /dev/null +++ b/ext/impl/psb_s_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_colsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_colsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_colsum diff --git a/ext/impl/psb_s_dia_csgetptn.f90 b/ext/impl/psb_s_dia_csgetptn.f90 new file mode 100644 index 00000000..f946eb73 --- /dev/null +++ b/ext/impl/psb_s_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csgetptn + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_s_dia_csgetptn diff --git a/ext/impl/psb_s_dia_csgetrow.f90 b/ext/impl/psb_s_dia_csgetrow.f90 new file mode 100644 index 00000000..b79e2470 --- /dev/null +++ b/ext/impl/psb_s_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csgetrow + implicit none + + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_s_dia_csgetrow diff --git a/ext/impl/psb_s_dia_csmm.f90 b/ext/impl/psb_s_dia_csmm.f90 new file mode 100644 index 00000000..9f586dbb --- /dev/null +++ b/ext/impl/psb_s_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csmm + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_s_dia_csmm_inner + +end subroutine psb_s_dia_csmm diff --git a/ext/impl/psb_s_dia_csmv.f90 b/ext/impl/psb_s_dia_csmv.f90 new file mode 100644 index 00000000..1a23932e --- /dev/null +++ b/ext/impl/psb_s_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_csmv + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_s_dia_csmv_inner + +end subroutine psb_s_dia_csmv diff --git a/ext/impl/psb_s_dia_get_diag.f90 b/ext/impl/psb_s_dia_get_diag.f90 new file mode 100644 index 00000000..5909c72a --- /dev/null +++ b/ext/impl/psb_s_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_get_diag + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = sone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_get_diag diff --git a/ext/impl/psb_s_dia_maxval.f90 b/ext/impl/psb_s_dia_maxval.f90 new file mode 100644 index 00000000..5f672644 --- /dev/null +++ b/ext/impl/psb_s_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_s_dia_maxval(a) result(res) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_maxval + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_s_dia_maxval diff --git a/ext/impl/psb_s_dia_mold.f90 b/ext/impl/psb_s_dia_mold.f90 new file mode 100644 index 00000000..a65379a4 --- /dev/null +++ b/ext/impl/psb_s_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_mold(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_mold + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_mold diff --git a/ext/impl/psb_s_dia_print.f90 b/ext/impl/psb_s_dia_print.f90 new file mode 100644 index 00000000..a0de1ba8 --- /dev/null +++ b/ext/impl/psb_s_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dia_print' + logical, parameter :: debug=.false. + + class(psb_s_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_s_dia_print diff --git a/ext/impl/psb_s_dia_reallocate_nz.f90 b/ext/impl/psb_s_dia_reallocate_nz.f90 new file mode 100644 index 00000000..d37d9e5f --- /dev/null +++ b/ext/impl/psb_s_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_reallocate_nz diff --git a/ext/impl/psb_s_dia_reinit.f90 b/ext/impl/psb_s_dia_reinit.f90 new file mode 100644 index 00000000..dd109783 --- /dev/null +++ b/ext/impl/psb_s_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_reinit(a,clear) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_reinit + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = szero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_reinit diff --git a/ext/impl/psb_s_dia_rowsum.f90 b/ext/impl/psb_s_dia_rowsum.f90 new file mode 100644 index 00000000..3f32a2b2 --- /dev/null +++ b/ext/impl/psb_s_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_rowsum(d,a) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_rowsum + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_rowsum diff --git a/ext/impl/psb_s_dia_scal.f90 b/ext/impl/psb_s_dia_scal.f90 new file mode 100644 index 00000000..7ccf881a --- /dev/null +++ b/ext/impl/psb_s_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_scal + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_scal diff --git a/ext/impl/psb_s_dia_scals.f90 b/ext/impl/psb_s_dia_scals.f90 new file mode 100644 index 00000000..da1bc94e --- /dev/null +++ b/ext/impl/psb_s_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_dia_scals(d,a,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_dia_scals + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dia_scals diff --git a/ext/impl/psb_s_dns_mat_impl.f90 b/ext/impl/psb_s_dns_mat_impl.f90 new file mode 100644 index 00000000..f6f458c8 --- /dev/null +++ b/ext/impl/psb_s_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_s_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_s_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csmv + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='s_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call sgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_csmv + + +!> Function csmm: +!! \memberof psb_s_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_s_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csmm + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='s_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call sgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_s_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_s_dns_csnmi(a) result(res) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csnmi + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_spk_) :: acc + + res = szero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_s_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_s_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_s_dns_get_diag(a,d,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_get_diag + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = szero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_s_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_s_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_s_dns_sparse_mat +!! \brief Allocate a class(psb_s_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_s_dns_mold(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_mold + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_s_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_s_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_s_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = szero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_s_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_s_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_csgetrow + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_s_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_s_dns_trim(a) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_dns_trim + implicit none + class(psb_s_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Copy and convert from psb_s_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_s_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_cp_dns_from_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_s_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = szero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = szero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Copy and convert to psb_s_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_s_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_cp_dns_to_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= szero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_s_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Convert to psb_s_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_s_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_mv_dns_to_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_s_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_s_dns_sparse_mat +!! \brief Convert from psb_s_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_s_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_s_dns_mat_mod, psb_protect_name => psb_s_mv_dns_from_coo + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_dns_from_coo + diff --git a/ext/impl/psb_s_ell_aclsum.f90 b/ext/impl/psb_s_ell_aclsum.f90 new file mode 100644 index 00000000..2eea0cc9 --- /dev/null +++ b/ext/impl/psb_s_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_aclsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_aclsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_aclsum diff --git a/ext/impl/psb_s_ell_allocate_mnnz.f90 b/ext/impl/psb_s_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..fd9f1b49 --- /dev/null +++ b/ext/impl/psb_s_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_allocate_mnnz diff --git a/ext/impl/psb_s_ell_arwsum.f90 b/ext/impl/psb_s_ell_arwsum.f90 new file mode 100644 index 00000000..a47f8721 --- /dev/null +++ b/ext/impl/psb_s_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_arwsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_arwsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_arwsum diff --git a/ext/impl/psb_s_ell_colsum.f90 b/ext/impl/psb_s_ell_colsum.f90 new file mode 100644 index 00000000..0924d8a9 --- /dev/null +++ b/ext/impl/psb_s_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_colsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_colsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_colsum diff --git a/ext/impl/psb_s_ell_csgetblk.f90 b/ext/impl/psb_s_ell_csgetblk.f90 new file mode 100644 index 00000000..5468e93c --- /dev/null +++ b/ext/impl/psb_s_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csgetblk + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_csgetblk diff --git a/ext/impl/psb_s_ell_csgetptn.f90 b/ext/impl/psb_s_ell_csgetptn.f90 new file mode 100644 index 00000000..07463757 --- /dev/null +++ b/ext/impl/psb_s_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csgetptn + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_ell_csgetrow + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_ell_csmm + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ell_csmv + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_s_ell_csnm1 + + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_spk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = sone + else + vt(:) = szero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_s_ell_csnm1 diff --git a/ext/impl/psb_s_ell_csnmi.f90 b/ext/impl/psb_s_ell_csnmi.f90 new file mode 100644 index 00000000..1df9bafa --- /dev/null +++ b/ext/impl/psb_s_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csnmi + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = szero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + sone + res = max(res,acc) + end do + +end function psb_s_ell_csnmi diff --git a/ext/impl/psb_s_ell_csput.f90 b/ext/impl/psb_s_ell_csput.f90 new file mode 100644 index 00000000..c0d69067 --- /dev/null +++ b/ext/impl/psb_s_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_csput_a + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_s_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_s_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='s_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_s_ell_srch_upd +end subroutine psb_s_ell_csput_a diff --git a/ext/impl/psb_s_ell_cssm.f90 b/ext/impl/psb_s_ell_cssm.f90 new file mode 100644 index 00000000..ca50fa35 --- /dev/null +++ b/ext/impl/psb_s_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_cssm + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + real(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_s_ell_cssv + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + real(psb_spk_) :: acc + real(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_s_ell_get_diag + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = sone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = szero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_get_diag diff --git a/ext/impl/psb_s_ell_maxval.f90 b/ext/impl/psb_s_ell_maxval.f90 new file mode 100644 index 00000000..6e2635b8 --- /dev/null +++ b/ext/impl/psb_s_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_ell_maxval(a) result(res) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_maxval + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_spk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = sone + else + res = szero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_s_ell_maxval diff --git a/ext/impl/psb_s_ell_mold.f90 b/ext/impl/psb_s_ell_mold.f90 new file mode 100644 index 00000000..4d137112 --- /dev/null +++ b/ext/impl/psb_s_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_mold(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_mold + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_mold diff --git a/ext/impl/psb_s_ell_print.f90 b/ext/impl/psb_s_ell_print.f90 new file mode 100644 index 00000000..aec15a6d --- /dev/null +++ b/ext/impl/psb_s_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_s_ell_print diff --git a/ext/impl/psb_s_ell_reallocate_nz.f90 b/ext/impl/psb_s_ell_reallocate_nz.f90 new file mode 100644 index 00000000..ff7dabda --- /dev/null +++ b/ext/impl/psb_s_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_reallocate_nz diff --git a/ext/impl/psb_s_ell_reinit.f90 b/ext/impl/psb_s_ell_reinit.f90 new file mode 100644 index 00000000..088e8398 --- /dev/null +++ b/ext/impl/psb_s_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_reinit(a,clear) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_reinit + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = szero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_reinit diff --git a/ext/impl/psb_s_ell_rowsum.f90 b/ext/impl/psb_s_ell_rowsum.f90 new file mode 100644 index 00000000..092329c9 --- /dev/null +++ b/ext/impl/psb_s_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_rowsum(d,a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_rowsum + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = sone + else + d(i) = szero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_rowsum diff --git a/ext/impl/psb_s_ell_scal.f90 b/ext/impl/psb_s_ell_scal.f90 new file mode 100644 index 00000000..7f39f63d --- /dev/null +++ b/ext/impl/psb_s_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_scal + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_scal diff --git a/ext/impl/psb_s_ell_scals.f90 b/ext/impl/psb_s_ell_scals.f90 new file mode 100644 index 00000000..4bc77626 --- /dev/null +++ b/ext/impl/psb_s_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_scals(d,a,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_scals + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_scals diff --git a/ext/impl/psb_s_ell_trim.f90 b/ext/impl/psb_s_ell_trim.f90 new file mode 100644 index 00000000..758a8bb5 --- /dev/null +++ b/ext/impl/psb_s_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_ell_trim(a) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_ell_trim + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_ell_trim diff --git a/ext/impl/psb_s_hdia_allocate_mnnz.f90 b/ext/impl/psb_s_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..2c4e16fc --- /dev/null +++ b/ext/impl/psb_s_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hdia_allocate_mnnz diff --git a/ext/impl/psb_s_hdia_csmv.f90 b/ext/impl/psb_s_hdia_csmv.f90 new file mode 100644 index 00000000..d945f964 --- /dev/null +++ b/ext/impl/psb_s_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_s_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_csmv + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_s_inner_dia_csmv + +end subroutine psb_s_hdia_csmv diff --git a/ext/impl/psb_s_hdia_mold.f90 b/ext/impl/psb_s_hdia_mold.f90 new file mode 100644 index 00000000..a62630c0 --- /dev/null +++ b/ext/impl/psb_s_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hdia_mold(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_mold + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hdia_mold diff --git a/ext/impl/psb_s_hdia_print.f90 b/ext/impl/psb_s_hdia_print.f90 new file mode 100644 index 00000000..f4b927bc --- /dev/null +++ b/ext/impl/psb_s_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_s_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + real(psb_spk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_s_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_s_hdia_print diff --git a/ext/impl/psb_s_hll_aclsum.f90 b/ext/impl/psb_s_hll_aclsum.f90 new file mode 100644 index 00000000..cf75dfb2 --- /dev/null +++ b/ext/impl/psb_s_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_aclsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_aclsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_aclsum + +end subroutine psb_s_hll_aclsum diff --git a/ext/impl/psb_s_hll_allocate_mnnz.f90 b/ext/impl/psb_s_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..549eccb4 --- /dev/null +++ b/ext/impl/psb_s_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_allocate_mnnz diff --git a/ext/impl/psb_s_hll_arwsum.f90 b/ext/impl/psb_s_hll_arwsum.f90 new file mode 100644 index 00000000..b93efb12 --- /dev/null +++ b/ext/impl/psb_s_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_arwsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_arwsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_arwsum + +end subroutine psb_s_hll_arwsum diff --git a/ext/impl/psb_s_hll_colsum.f90 b/ext/impl/psb_s_hll_colsum.f90 new file mode 100644 index 00000000..02cceac2 --- /dev/null +++ b/ext/impl/psb_s_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_colsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_colsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = sone + else + d = szero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine s_hll_colsum + +end subroutine psb_s_hll_colsum diff --git a/ext/impl/psb_s_hll_csgetblk.f90 b/ext/impl/psb_s_hll_csgetblk.f90 new file mode 100644 index 00000000..c925e3a2 --- /dev/null +++ b/ext/impl/psb_s_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csgetblk + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_csgetblk diff --git a/ext/impl/psb_s_hll_csgetptn.f90 b/ext/impl/psb_s_hll_csgetptn.f90 new file mode 100644 index 00000000..ccb1b6a1 --- /dev/null +++ b/ext/impl/psb_s_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csgetptn + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_hll_csgetrow + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_s_hll_csmm + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + real(psb_spk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_s_hll_csmv + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_s_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_s_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == sone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == sone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == szero) then + if (beta == szero) then + do i=1,m + y(ir+i-1) = szero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == szero) then + do i=1,m + tmp = szero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = szero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_s_hll_csmv_inner + + subroutine psb_s_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == szero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_8 + + subroutine psb_s_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == szero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_24 + + subroutine psb_s_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == szero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_16 + + subroutine psb_s_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == szero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_32 + + subroutine psb_s_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_spk_, szero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + real(psb_spk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = szero + if (alpha /= szero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == szero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_s_hll_csmv_notra_4 + +end subroutine psb_s_hll_csmv diff --git a/ext/impl/psb_s_hll_csnm1.f90 b/ext/impl/psb_s_hll_csnm1.f90 new file mode 100644 index 00000000..6e745081 --- /dev/null +++ b/ext/impl/psb_s_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csnm1 + + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_spk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = szero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = sone + else + vt = szero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_s_hll_csnm1_inner + +end function psb_s_hll_csnm1 diff --git a/ext/impl/psb_s_hll_csnmi.f90 b/ext/impl/psb_s_hll_csnmi.f90 new file mode 100644 index 00000000..3be15f9b --- /dev/null +++ b/ext/impl/psb_s_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csnmi + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='s_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = szero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = sone + else + tmp = szero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_s_hll_csnmi_inner + +end function psb_s_hll_csnmi diff --git a/ext/impl/psb_s_hll_csput.f90 b/ext/impl/psb_s_hll_csput.f90 new file mode 100644 index 00000000..b12678d4 --- /dev/null +++ b/ext/impl/psb_s_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_csput_a + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_s_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='s_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_s_hll_srch_upd + +end subroutine psb_s_hll_csput_a diff --git a/ext/impl/psb_s_hll_cssm.f90 b/ext/impl/psb_s_hll_cssm.f90 new file mode 100644 index 00000000..30c77c8f --- /dev/null +++ b/ext/impl/psb_s_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_cssm + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + real(psb_spk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_s_hll_cssv + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + real(psb_spk_) :: acc + real(psb_spk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='s_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_s_hll_get_diag + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = sone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_s_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = szero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = szero + end if + end do + + end subroutine psb_s_hll_get_diag_inner + +end subroutine psb_s_hll_get_diag diff --git a/ext/impl/psb_s_hll_maxval.f90 b/ext/impl/psb_s_hll_maxval.f90 new file mode 100644 index 00000000..84625328 --- /dev/null +++ b/ext/impl/psb_s_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_s_hll_maxval(a) result(res) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_maxval + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,sone) + +end function psb_s_hll_maxval diff --git a/ext/impl/psb_s_hll_mold.f90 b/ext/impl/psb_s_hll_mold.f90 new file mode 100644 index 00000000..eb04ccd9 --- /dev/null +++ b/ext/impl/psb_s_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_mold(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_mold + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_s_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_mold diff --git a/ext/impl/psb_s_hll_print.f90 b/ext/impl/psb_s_hll_print.f90 new file mode 100644 index 00000000..fb6bb38d --- /dev/null +++ b/ext/impl/psb_s_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_s_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_s_hll_print diff --git a/ext/impl/psb_s_hll_reallocate_nz.f90 b/ext/impl/psb_s_hll_reallocate_nz.f90 new file mode 100644 index 00000000..f7a3076f --- /dev/null +++ b/ext/impl/psb_s_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='s_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_reallocate_nz diff --git a/ext/impl/psb_s_hll_reinit.f90 b/ext/impl/psb_s_hll_reinit.f90 new file mode 100644 index 00000000..170abe08 --- /dev/null +++ b/ext/impl/psb_s_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_reinit(a,clear) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_reinit + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = szero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_reinit diff --git a/ext/impl/psb_s_hll_rowsum.f90 b/ext/impl/psb_s_hll_rowsum.f90 new file mode 100644 index 00000000..c7484698 --- /dev/null +++ b/ext/impl/psb_s_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_rowsum(d,a) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_rowsum + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = sone + else + d = szero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call s_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine s_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: val(ldv,*) + real(psb_spk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_spk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine s_hll_rowsum + +end subroutine psb_s_hll_rowsum diff --git a/ext/impl/psb_s_hll_scal.f90 b/ext/impl/psb_s_hll_scal.f90 new file mode 100644 index 00000000..c8f3ddd5 --- /dev/null +++ b/ext/impl/psb_s_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_scal + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_s_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_s_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + real(psb_spk_), intent(in) :: d(*) + real(psb_spk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_s_hll_scal_inner + + +end subroutine psb_s_hll_scal diff --git a/ext/impl/psb_s_hll_scals.f90 b/ext/impl/psb_s_hll_scals.f90 new file mode 100644 index 00000000..8f823a20 --- /dev/null +++ b/ext/impl/psb_s_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_hll_scals(d,a,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_hll_scals + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_s_hll_scals diff --git a/ext/impl/psb_s_mv_dia_from_coo.f90 b/ext/impl/psb_s_mv_dia_from_coo.f90 new file mode 100644 index 00000000..d7dcfc1b --- /dev/null +++ b/ext/impl/psb_s_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_mv_dia_from_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_dia_from_coo diff --git a/ext/impl/psb_s_mv_dia_to_coo.f90 b/ext/impl/psb_s_mv_dia_to_coo.f90 new file mode 100644 index 00000000..c0944b21 --- /dev/null +++ b/ext/impl/psb_s_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psb_s_mv_dia_to_coo + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_dia_to_coo diff --git a/ext/impl/psb_s_mv_ell_from_coo.f90 b/ext/impl/psb_s_mv_ell_from_coo.f90 new file mode 100644 index 00000000..90965e41 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_from_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_s_mv_ell_from_coo diff --git a/ext/impl/psb_s_mv_ell_from_fmt.f90 b/ext/impl/psb_s_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..03ebf8e4 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_from_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_s_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_ell_from_fmt diff --git a/ext/impl/psb_s_mv_ell_to_coo.f90 b/ext/impl/psb_s_mv_ell_to_coo.f90 new file mode 100644 index 00000000..151cbeff --- /dev/null +++ b/ext/impl/psb_s_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_to_coo + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_ell_to_coo diff --git a/ext/impl/psb_s_mv_ell_to_fmt.f90 b/ext/impl/psb_s_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..66f33508 --- /dev/null +++ b/ext/impl/psb_s_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psb_s_mv_ell_to_fmt + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_s_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_ell_to_fmt diff --git a/ext/impl/psb_s_mv_hdia_from_coo.f90 b/ext/impl/psb_s_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..88765079 --- /dev/null +++ b/ext/impl/psb_s_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_mv_hdia_from_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hdia_from_coo diff --git a/ext/impl/psb_s_mv_hdia_to_coo.f90 b/ext/impl/psb_s_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..56399b2a --- /dev/null +++ b/ext/impl/psb_s_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_s_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hdia_mat_mod, psb_protect_name => psb_s_mv_hdia_to_coo + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_hdia_to_coo diff --git a/ext/impl/psb_s_mv_hll_from_coo.f90 b/ext/impl/psb_s_mv_hll_from_coo.f90 new file mode 100644 index 00000000..c8e46086 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_from_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_s_mv_hll_from_coo diff --git a/ext/impl/psb_s_mv_hll_from_fmt.f90 b/ext/impl/psb_s_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..19bda0a6 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_from_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_s_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hll_from_fmt diff --git a/ext/impl/psb_s_mv_hll_to_coo.f90 b/ext/impl/psb_s_mv_hll_to_coo.f90 new file mode 100644 index 00000000..d36286a5 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_to_coo + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_s_mv_hll_to_coo diff --git a/ext/impl/psb_s_mv_hll_to_fmt.f90 b/ext/impl/psb_s_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..17618f69 --- /dev/null +++ b/ext/impl/psb_s_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_s_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psb_s_mv_hll_to_fmt + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_s_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_s_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_s_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_s_mv_hll_to_fmt diff --git a/ext/impl/psb_z_cp_dia_from_coo.f90 b/ext/impl/psb_z_cp_dia_from_coo.f90 new file mode 100644 index 00000000..e87bfb34 --- /dev/null +++ b/ext/impl/psb_z_cp_dia_from_coo.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_cp_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_cp_dia_from_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_convert_dia_from_coo(a,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_dia_from_coo(a,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_dia_from_coo diff --git a/ext/impl/psb_z_cp_dia_to_coo.f90 b/ext/impl/psb_z_cp_dia_to_coo.f90 new file mode 100644 index 00000000..26fac30b --- /dev/null +++ b/ext/impl/psb_z_cp_dia_to_coo.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_cp_dia_to_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc, nzd + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + call psi_z_xtr_coo_from_dia(nr,nc,& + & b%ia, b%ja, b%val, nzd, & + & size(a%data,1),size(a%data,2),& + & a%data,a%offset,info) + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +end subroutine psb_z_cp_dia_to_coo diff --git a/ext/impl/psb_z_cp_ell_from_coo.f90 b/ext/impl/psb_z_cp_ell_from_coo.f90 new file mode 100644 index 00000000..7559621d --- /dev/null +++ b/ext/impl/psb_z_cp_ell_from_coo.f90 @@ -0,0 +1,71 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + ! This is to have fix_coo called behind the scenes + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call psi_z_convert_ell_from_coo(a,b,info) + else + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call psi_z_convert_ell_from_coo(a,tmp,info) + if (info == psb_success_) call tmp%free() + end if + if (info /= psb_success_) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + +end subroutine psb_z_cp_ell_from_coo diff --git a/ext/impl/psb_z_cp_ell_from_fmt.f90 b/ext/impl/psb_z_cp_ell_from_fmt.f90 new file mode 100644 index 00000000..6d63b64e --- /dev/null +++ b/ext/impl/psb_z_cp_ell_from_fmt.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_from_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_z_cp_ell_from_fmt diff --git a/ext/impl/psb_z_cp_ell_to_coo.f90 b/ext/impl/psb_z_cp_ell_to_coo.f90 new file mode 100644 index 00000000..38a1696b --- /dev/null +++ b/ext/impl/psb_z_cp_ell_to_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_to_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: i, j, k, nr, nc, nza + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + b%val(k) = a%val(i,j) + end do + end do + call b%set_nzeros(a%get_nzeros()) + call b%fix(info) + call b%set_host() + +end subroutine psb_z_cp_ell_to_coo diff --git a/ext/impl/psb_z_cp_ell_to_fmt.f90 b/ext/impl/psb_z_cp_ell_to_fmt.f90 new file mode 100644 index 00000000..7fb64a90 --- /dev/null +++ b/ext/impl/psb_z_cp_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_cp_ell_to_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (a%is_dev()) call a%sync() + + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_ell_to_fmt diff --git a/ext/impl/psb_z_cp_hdia_from_coo.f90 b/ext/impl/psb_z_cp_hdia_from_coo.f90 new file mode 100644 index 00000000..ed77914e --- /dev/null +++ b/ext/impl/psb_z_cp_hdia_from_coo.f90 @@ -0,0 +1,222 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_cp_hdia_from_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + if (b%is_dev()) call b%sync() + if (b%is_by_rows()) then + call inner_cp_hdia_from_coo(a,b,info) + if (info /= psb_success_) goto 9999 + else + call b%cp_to_coo(tmp,info) + if (info /= psb_success_) goto 9999 + if (.not.tmp%is_by_rows()) call tmp%fix(info) + if (info /= psb_success_) goto 9999 + call inner_cp_hdia_from_coo(a,tmp,info) + if (info /= psb_success_) goto 9999 + call tmp%free() + end if + call a%set_host() + + return + +9999 continue + + info = psb_err_alloc_dealloc_ + return + +contains + + subroutine inner_cp_hdia_from_coo(a,tmp,info) + use psb_base_mod + use psi_ext_util_mod + + implicit none + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,mi,mj,dm,bi,w + integer(psb_ipk_),allocatable :: d(:), offset(:), irsz(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext, nzout + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + logical, parameter :: debug=.false. + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + hacksize = a%hacksize + a%nhacks = (nr+hacksize-1)/hacksize + nhacks = a%nhacks + + ndiag = nr+nc-1 + if (info == psb_success_) call psb_realloc(nr,irsz,info) + if (info == psb_success_) call psb_realloc(ndiag,d,info) + if (info == psb_success_) call psb_realloc(ndiag,offset,info) + if (info == psb_success_) call psb_realloc(nhacks+1,a%hackoffsets,info) + if (info /= psb_success_) return + + irsz = 0 + do k=1,nza + ir = tmp%ia(k) + irsz(ir) = irsz(ir)+1 + end do + + a%nzeros = 0 + d = 0 + iszd = 0 + a%hackOffsets(1)=0 + klast1 = 1 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + if (debug) then + write(*,*) 'Loop iteration ',k,nhacks,i,ib,nr + write(*,*) 'RW:',tmp%ia(kfirst),tmp%ia(klast1-1) + write(*,*) 'CL:',tmp%ja(kfirst),tmp%ja(klast1-1) + end if + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, offset, info, initd=.false., cleard=.true.) + iszd = iszd + nd + a%hackOffsets(k+1)=iszd + if (debug) write(*,*) 'From chunk ',k,i,ib,sum(irsz(i:i+ib-1)),': ',nd, iszd + if (debug) write(*,*) 'offset ', offset(1:nd) + end do + if (debug) then + write(*,*) 'Hackcount ',nhacks,' Allocation height ',iszd + write(*,*) 'Hackoffsets ',a%hackOffsets(:) + end if + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%diaOffsets,info) + if (info == psb_success_) call psb_realloc(hacksize*iszd,a%val,info) + if (info /= psb_success_) return + klast1 = 1 + ! + ! Second run: copy elements + ! + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + kfirst = klast1 + klast1 = kfirst + sum(irsz(i:i+ib-1)) + ! klast1 points to last element of chunk plus 1 + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_dia_offset_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & nd, d, a%diaOffsets(hackfirst+1:hacknext), info, & + & initd=.false., cleard=.false.) + if (debug) write(*,*) 'Out from dia_offset: ', a%diaOffsets(hackfirst+1:hacknext) + call psi_z_xtr_dia_from_coo(nr,nc,(klast1-kfirst),& + & tmp%ia(kfirst:klast1-1), tmp%ja(kfirst:klast1-1),& + & tmp%val(kfirst:klast1-1), & + & d,hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),info,& + & initdata=.true.,rdisp=(i-1)) + + call countnz(nr,nc,(i-1),hacksize,(hacknext-hackfirst),& + & a%diaOffsets(hackfirst+1:hacknext),nzout) + a%nzeros = a%nzeros + nzout + call cleand(nr,(hacknext-hackfirst),d,a%diaOffsets(hackfirst+1:hacknext)) + + end do + if (debug) then + write(*,*) 'NZEROS: ',a%nzeros, nza + write(*,*) 'diaoffsets: ',a%diaOffsets(1:iszd) + write(*,*) 'values: ' + j=0 + do k=1,nhacks + write(*,*) 'Hack No. ',k + do i=1,hacksize*(iszd/nhacks) + j = j + 1 + write(*,*) j, a%val(j) + end do + end do + end if + end subroutine inner_cp_hdia_from_coo + + subroutine countnz(nr,nc,rdisp,nrd,ncd,offsets,nz) + implicit none + integer(psb_ipk_), intent(in) :: nr,nc,nrd,ncd,rdisp,offsets(:) + integer(psb_ipk_), intent(out) :: nz + ! + integer(psb_ipk_) :: i,j,k, ir, jc, m4, ir1, ir2, nrcmdisp, rdisp1 + nz = 0 + nrcmdisp = min(nr-rdisp,nc-rdisp) + rdisp1 = 1-rdisp + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + nz = nz + (ir2-ir1+1) + end do + end subroutine countnz + + subroutine cleand(nr,nd,d,offset) + implicit none + integer(psb_ipk_), intent(in) :: nr,nd,offset(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_) :: i,id + + do i=1,nd + id = offset(i) + nr + d(id) = 0 + end do + end subroutine cleand + +end subroutine psb_z_cp_hdia_from_coo diff --git a/ext/impl/psb_z_cp_hdia_to_coo.f90 b/ext/impl/psb_z_cp_hdia_to_coo.f90 new file mode 100644 index 00000000..c0544ff0 --- /dev/null +++ b/ext/impl/psb_z_cp_hdia_to_coo.f90 @@ -0,0 +1,84 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_cp_hdia_to_coo + use psi_ext_util_mod + implicit none + + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: k,i,j,nc,nr,nza, nzd,nd,hacksize,nhacks,iszd,& + & ib, ir, kfirst, klast1, hackfirst, hacknext + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call b%set_nzeros(nza) + call b%set_sort_status(psb_unsorted_) + nhacks = a%nhacks + hacksize = a%hacksize + j = 0 + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + call psi_z_xtr_coo_from_dia(nr,nc,& + & b%ia(j+1:), b%ja(j+1:), b%val(j+1:), nzd, & + & hacksize,(hacknext-hackfirst),& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) +!!$ write(*,*) 'diaoffsets',ib,' : ',ib - abs(a%diaOffsets(hackfirst+1:hacknext)) +!!$ write(*,*) 'sum',ib,j,' : ',sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + j = j + nzd + end do + if (nza /= j) then + write(*,*) 'Wrong counts in hdia_to_coo',j,nza + info = -8 + return + end if + call b%set_host() + call b%fix(info) + +end subroutine psb_z_cp_hdia_to_coo diff --git a/ext/impl/psb_z_cp_hll_from_coo.f90 b/ext/impl/psb_z_cp_hll_from_coo.f90 new file mode 100644 index 00000000..15a8d1c2 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_from_coo.f90 @@ -0,0 +1,74 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_from_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: debug_level, debug_unit, hksz + character(len=20) :: name='hll_from_coo' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + if (b%is_dev()) call b%sync() + hksz = psi_get_hksz() + if (b%is_by_rows()) then + call psi_convert_hll_from_coo(a,hksz,b,info) + else + ! This is to guarantee tmp%is_by_rows() + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + + if (info /= psb_success_) return + call psi_convert_hll_from_coo(a,hksz,tmp,info) + + call tmp%free() + end if + if (info /= 0) goto 9999 + call a%set_host() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_cp_hll_from_coo diff --git a/ext/impl/psb_z_cp_hll_from_fmt.f90 b/ext/impl/psb_z_cp_hll_from_fmt.f90 new file mode 100644 index 00000000..3bdb2271 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_from_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + class is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b,info) + + class is (psb_z_hll_sparse_mat) + ! write(0,*) 'From type_hll' + if (b%is_dev()) call b%sync() + + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( b%irn, a%irn , info) + if (info == 0) call psb_safe_cpy( b%hkoffs, a%hkoffs, info) + if (info == 0) call psb_safe_cpy( b%idiag, a%idiag, info) + if (info == 0) call psb_safe_cpy( b%ja , a%ja , info) + if (info == 0) call psb_safe_cpy( b%val, a%val , info) + if (info == 0) a%hksz = b%hksz + if (info == 0) a%nzt = b%nzt + call a%set_host() + + class default + call b%cp_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select +end subroutine psb_z_cp_hll_from_fmt diff --git a/ext/impl/psb_z_cp_hll_to_coo.f90 b/ext/impl/psb_z_cp_hll_to_coo.f90 new file mode 100644 index 00000000..409fe7b5 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_to_coo.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_to_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j, jj,k,ir, isz,err_act, hksz, hk, mxrwl,& + & irs, nzblk, kc + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + j = 1 + kc = 1 + k = 1 + hksz = a%hksz + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + nzblk = sum(a%irn(i:i+ir-1)) + call inner_copy(i,ir,b%ia(kc:kc+nzblk-1),& + & b%ja(kc:kc+nzblk-1),b%val(kc:kc+nzblk-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & hksz) + k = k + isz + kc = kc + nzblk + + enddo + + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + +contains + + subroutine inner_copy(i,ir,iac,& + & jac,valc,ja,val,irn,ld) + integer(psb_ipk_) :: i,ir,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*) + complex(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc + kc = 1 + do ii = 1, ir + nc = irn(ii) + do jj=1,nc + iac(kc) = i+ii-1 + jac(kc) = ja(ii,jj) + valc(kc) = val(ii,jj) + kc = kc + 1 + end do + end do + + end subroutine inner_copy + +end subroutine psb_z_cp_hll_to_coo diff --git a/ext/impl/psb_z_cp_hll_to_fmt.f90 b/ext/impl/psb_z_cp_hll_to_fmt.f90 new file mode 100644 index 00000000..b0417c92 --- /dev/null +++ b/ext/impl/psb_z_cp_hll_to_fmt.f90 @@ -0,0 +1,68 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_cp_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_cp_hll_to_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%cp_to_coo(b,info) + + type is (psb_z_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + if (info == 0) call psb_safe_cpy( a%hkoffs, b%hkoffs , info) + if (info == 0) call psb_safe_cpy( a%idiag, b%idiag , info) + if (info == 0) call psb_safe_cpy( a%irn, b%irn , info) + if (info == 0) call psb_safe_cpy( a%ja , b%ja , info) + if (info == 0) call psb_safe_cpy( a%val, b%val , info) + if (info == 0) b%hksz = a%hksz + call b%set_host() + + class default + call a%cp_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_cp_hll_to_fmt diff --git a/ext/impl/psb_z_dia_aclsum.f90 b/ext/impl/psb_z_dia_aclsum.f90 new file mode 100644 index 00000000..5aed7ff0 --- /dev/null +++ b/ext/impl/psb_z_dia_aclsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_aclsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_aclsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_aclsum diff --git a/ext/impl/psb_z_dia_allocate_mnnz.f90 b/ext/impl/psb_z_dia_allocate_mnnz.f90 new file mode 100644 index 00000000..e9c614f6 --- /dev/null +++ b/ext/impl/psb_z_dia_allocate_mnnz.f90 @@ -0,0 +1,88 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -ione )/m + else + nz_ = ((max(7*m,7*n,ione)+m-ione)/m) + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,nz_,a%data,info) + if (info == psb_success_) call psb_realloc(m+n,a%offset,info) + if (info == psb_success_) then + a%data = 0 + a%offset = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_allocate_mnnz diff --git a/ext/impl/psb_z_dia_arwsum.f90 b/ext/impl/psb_z_dia_arwsum.f90 new file mode 100644 index 00000000..42805349 --- /dev/null +++ b/ext/impl/psb_z_dia_arwsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_arwsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_arwsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + abs(a%data(i,j)) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_arwsum diff --git a/ext/impl/psb_z_dia_colsum.f90 b/ext/impl/psb_z_dia_colsum.f90 new file mode 100644 index 00000000..69919736 --- /dev/null +++ b/ext/impl/psb_z_dia_colsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_colsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_colsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i+jc) = d(i+jc) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_colsum diff --git a/ext/impl/psb_z_dia_csgetptn.f90 b/ext/impl/psb_z_dia_csgetptn.f90 new file mode 100644 index 00000000..d63304f8 --- /dev/null +++ b/ext/impl/psb_z_dia_csgetptn.f90 @@ -0,0 +1,188 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csgetptn + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + + + end subroutine dia_getptn + +end subroutine psb_z_dia_csgetptn diff --git a/ext/impl/psb_z_dia_csgetrow.f90 b/ext/impl/psb_z_dia_csgetrow.f90 new file mode 100644 index 00000000..6571264e --- /dev/null +++ b/ext/impl/psb_z_dia_csgetrow.f90 @@ -0,0 +1,199 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csgetrow + implicit none + + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='dia_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + ir1 = max(irw,ir1) + ir1 = max(ir1,jmin-jc) + ir2 = min(lrw,ir2) + ir2 = min(ir2,jmax-jc) + nzc = ir2-ir1+1 + if (nzc>0) then + if (chksz) then + call psb_ensure_size(nzin_+nzc,ia,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,ja,info) + if (info == 0) call psb_ensure_size(nzin_+nzc,val,info) + end if + do i=ir1, ir2 + nzin_ = nzin_ + 1 + nz = nz + 1 + val(nzin_) = a%data(i,j) + ia(nzin_) = i + ja(nzin_) = i+jc + enddo + end if + enddo + end subroutine dia_getrow +end subroutine psb_z_dia_csgetrow diff --git a/ext/impl/psb_z_dia_csmm.f90 b/ext/impl/psb_z_dia_csmm.f90 new file mode 100644 index 00000000..cbebd10e --- /dev/null +++ b/ext/impl/psb_z_dia_csmm.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_csmm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csmm + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_dia_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i,1:nxy) = y(i,1:nxy) + alpha*data(i,j)*x(i+off(j),1:nxy) + enddo + enddo + + end subroutine psb_z_dia_csmm_inner + +end subroutine psb_z_dia_csmm diff --git a/ext/impl/psb_z_dia_csmv.f90 b/ext/impl/psb_z_dia_csmv.f90 new file mode 100644 index 00000000..9d1f5a2a --- /dev/null +++ b/ext/impl/psb_z_dia_csmv.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_csmv + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dia_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) 0) then + ir1 = 1 + ir2 = nr - off(j) + else + ir1 = 1 - off(j) + ir2 = nr + end if + do i=ir1, ir2 + y(i) = y(i) + alpha*data(i,j)*x(i+off(j)) + enddo + enddo + + end subroutine psb_z_dia_csmv_inner + +end subroutine psb_z_dia_csmv diff --git a/ext/impl/psb_z_dia_get_diag.f90 b/ext/impl/psb_z_dia_get_diag.f90 new file mode 100644 index 00000000..9b403923 --- /dev/null +++ b/ext/impl/psb_z_dia_get_diag.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_get_diag(a,d,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_get_diag + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = zone + else + do i=1, size(a%offset) + if (a%offset(i) == 0) then + d(1:mnm) = a%data(1:mnm,i) + exit + end if + end do + end if + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_get_diag diff --git a/ext/impl/psb_z_dia_maxval.f90 b/ext/impl/psb_z_dia_maxval.f90 new file mode 100644 index 00000000..d3518c17 --- /dev/null +++ b/ext/impl/psb_z_dia_maxval.f90 @@ -0,0 +1,54 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +function psb_z_dia_maxval(a) result(res) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_maxval + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_maxval' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + res = max(res,maxval(abs(a%data))) + +end function psb_z_dia_maxval diff --git a/ext/impl/psb_z_dia_mold.f90 b/ext/impl/psb_z_dia_mold.f90 new file mode 100644 index 00000000..421af284 --- /dev/null +++ b/ext/impl/psb_z_dia_mold.f90 @@ -0,0 +1,61 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_mold(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_mold + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='dia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_dia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_mold diff --git a/ext/impl/psb_z_dia_print.f90 b/ext/impl/psb_z_dia_print.f90 new file mode 100644 index 00000000..1f7853ef --- /dev/null +++ b/ext/impl/psb_z_dia_print.f90 @@ -0,0 +1,148 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dia_print' + logical, parameter :: debug=.false. + + class(psb_z_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, jc, ir1, ir2 + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + write(iout,*) nr, nc, nz + + nc=size(a%data,2) + + + + if(present(iv)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) iv(i),iv(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),(i+jc),a%data(i,j) + enddo + enddo + + else if (present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) ivr(i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),ivc(i+jc),a%data(i,j) + enddo + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + write(iout,frmt) (i),(i+jc),a%data(i,j) + enddo + enddo + + endif + +end subroutine psb_z_dia_print diff --git a/ext/impl/psb_z_dia_reallocate_nz.f90 b/ext/impl/psb_z_dia_reallocate_nz.f90 new file mode 100644 index 00000000..2d204a64 --- /dev/null +++ b/ext/impl/psb_z_dia_reallocate_nz.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_dia_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! Ans: NOTHING. + ! + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_reallocate_nz diff --git a/ext/impl/psb_z_dia_reinit.f90 b/ext/impl/psb_z_dia_reinit.f90 new file mode 100644 index 00000000..0f58a9ed --- /dev/null +++ b/ext/impl/psb_z_dia_reinit.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_reinit(a,clear) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_reinit + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%data(:,:) = zzero + call a%set_upd() + call a%set_host() + + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_reinit diff --git a/ext/impl/psb_z_dia_rowsum.f90 b/ext/impl/psb_z_dia_rowsum.f90 new file mode 100644 index 00000000..6918ada1 --- /dev/null +++ b/ext/impl/psb_z_dia_rowsum.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_rowsum(d,a) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_rowsum + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, ir1,ir2, nr + logical :: tra + integer(psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + nr = size(a%data,1) + nc = size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + d(i) = d(i) + a%data(i,j) + enddo + enddo + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_rowsum diff --git a/ext/impl/psb_z_dia_scal.f90 b/ext/impl/psb_z_dia_scal.f90 new file mode 100644 index 00000000..65957e60 --- /dev/null +++ b/ext/impl/psb_z_dia_scal.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_dia_scal(d,a,info,side) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_scal + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), nc, jc, nr, ir1, ir2 + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%data(i,:) = a%data(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + nr=size(a%data,1) + nc=size(a%data,2) + do j=1,nc + jc = a%offset(j) + if (jc > 0) then + ir1 = 1 + ir2 = nr - jc + else + ir1 = 1 - jc + ir2 = nr + end if + do i=ir1, ir2 + a%data(i,j) = a%data(i,j) * d(i+jc) + enddo + enddo + + end if + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_scal diff --git a/ext/impl/psb_z_dia_scals.f90 b/ext/impl/psb_z_dia_scals.f90 new file mode 100644 index 00000000..895763d9 --- /dev/null +++ b/ext/impl/psb_z_dia_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_dia_scals(d,a,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_dia_scals + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%data(:,:) = a%data(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dia_scals diff --git a/ext/impl/psb_z_dns_mat_impl.f90 b/ext/impl/psb_z_dns_mat_impl.f90 new file mode 100644 index 00000000..b249a3f2 --- /dev/null +++ b/ext/impl/psb_z_dns_mat_impl.f90 @@ -0,0 +1,724 @@ + +!> Function csmv: +!! \memberof psb_z_dns_sparse_mat +!! \brief Product by a dense rank 1 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_z_dns_csmv(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csmv + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act, m, n, lda + character(len=20) :: name='z_dns_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = psb_toupper(trans) + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (trans_ == 'N') then + m=a%get_nrows() + n=a%get_ncols() + else + n=a%get_nrows() + m=a%get_ncols() + end if + lda = size(a%val,1) + + + call zgemv(trans_,a%get_nrows(),a%get_ncols(),alpha,& + & a%val,size(a%val,1),x,1,beta,y,1) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_csmv + + +!> Function csmm: +!! \memberof psb_z_dns_sparse_mat +!! \brief Product by a dense rank 2 array. +!! +!! Compute +!! Y = alpha*op(A)*X + beta*Y +!! +!! \param alpha Scaling factor for Ax +!! \param A the input sparse matrix +!! \param x(:,:) the input dense X +!! \param beta Scaling factor for y +!! \param y(:,:) the input/output dense Y +!! \param info return code +!! \param trans [N] Whether to use A (N), its transpose (T) +!! or its conjugate transpose (C) +!! +! +subroutine psb_z_dns_csmm(alpha,a,x,beta,y,info,trans) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csmm + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + ! + character :: trans_ + integer(psb_ipk_) :: err_act,m,n,k, lda, ldx, ldy + character(len=20) :: name='z_dns_csmm' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + if (psb_toupper(trans_)=='N') then + m = a%get_nrows() + k = a%get_ncols() + n = min(size(y,2),size(x,2)) + else + k = a%get_nrows() + m = a%get_ncols() + n = min(size(y,2),size(x,2)) + end if + lda = size(a%val,1) + ldx = size(x,1) + ldy = size(y,1) + call zgemm(trans_,'N',m,n,k,alpha,a%val,lda,x,ldx,beta,y,ldy) + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_csmm + + + +! +! +!> Function csnmi: +!! \memberof psb_z_dns_sparse_mat +!! \brief Operator infinity norm +!! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) +!! +! +function psb_z_dns_csnmi(a) result(res) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csnmi + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + ! + integer(psb_ipk_) :: i + real(psb_dpk_) :: acc + + res = dzero + if (a%is_dev()) call a%sync() + + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_z_dns_csnmi + + +! +!> Function get_diag: +!! \memberof psb_z_dns_sparse_mat +!! \brief Extract the diagonal of A. +!! +!! D(i) = A(i:i), i=1:min(nrows,ncols) +!! +!! \param d(:) The output diagonal +!! \param info return code. +! +subroutine psb_z_dns_get_diag(a,d,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_get_diag + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act, mnm, i + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + do i=1, mnm + d(i) = a%val(i,i) + end do + do i=mnm+1,size(d) + d(i) = zzero + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_get_diag + + +! +! +!> Function reallocate_nz +!! \memberof psb_z_dns_sparse_mat +!! \brief One--parameters version of (re)allocate +!! +!! \param nz number of nonzeros to allocate for +!! i.e. makes sure that the internal storage +!! allows for NZ coefficients and their indices. +! +subroutine psb_z_dns_reallocate_nz(nz,a) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_dns_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! This is a no-op, allocation is fixed. + ! + if (a%is_dev()) call a%sync() + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_reallocate_nz + +! +!> Function mold: +!! \memberof psb_z_dns_sparse_mat +!! \brief Allocate a class(psb_z_dns_sparse_mat) with the +!! same dynamic type as the input. +!! This is equivalent to allocate( mold= ) and is provided +!! for those compilers not yet supporting mold. +!! \param b The output variable +!! \param info return code +! +subroutine psb_z_dns_mold(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_mold + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='dns_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + allocate(psb_z_dns_sparse_mat :: b, stat=info) + + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_mold + +! +! +!> Function allocate_mnnz +!! \memberof psb_z_dns_sparse_mat +!! \brief Three-parameters version of allocate +!! +!! \param m number of rows +!! \param n number of cols +!! \param nz [estimated internally] number of nonzeros to allocate for +! +subroutine psb_z_dns_allocate_mnnz(m,n,a,nz) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + ! + integer(psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/1_psb_ipk_/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2_psb_ipk_/)) + goto 9999 + endif + + + ! Basic stuff common to all formats + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_bld() + call a%set_host() + + ! We ignore NZ in this case. + + call psb_realloc(m,n,a%val,info) + if (info == psb_success_) then + a%val = zzero + a%nnz = 0 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_allocate_mnnz + + +! +! +! +!> Function csgetrow: +!! \memberof psb_z_dns_sparse_mat +!! \brief Get a (subset of) row(s) +!! +!! getrow is the basic method by which the other (getblk, clip) can +!! be implemented. +!! +!! Returns the set +!! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) +!! each identifying the position of a nonzero in A +!! i.e. +!! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) +!! with IMIN<=IA(:)<=IMAX +!! with JMIN<=JA(:)<=JMAX +!! IA,JA are reallocated as necessary. +!! +!! \param imin the minimum row index we are interested in +!! \param imax the minimum row index we are interested in +!! \param nz the number of output coefficients +!! \param ia(:) the output row indices +!! \param ja(:) the output col indices +!! \param val(:) the output coefficients +!! \param info return code +!! \param jmin [1] minimum col index +!! \param jmax [a\%get_ncols()] maximum col index +!! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) +!! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] +!! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] +!! ( iren cannot be specified with rscale/cscale) +!! \param append [false] append to ia,ja +!! \param nzin [none] if append, then first new entry should go in entry nzin+1 +!! +! +subroutine psb_z_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_csgetrow + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + ! + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i,j,k + character(len=20) :: name='csget' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (a%is_dev()) call a%sync() + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax Function trim +!! \memberof psb_z_dns_sparse_mat +!! \brief Memory trim +!! Make sure the memory allocation of the sparse matrix is as tight as +!! possible given the actual number of nonzeros it contains. +! +subroutine psb_z_dns_trim(a) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_dns_trim + implicit none + class(psb_z_dns_sparse_mat), intent(inout) :: a + ! + integer(psb_ipk_) :: err_act + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + ! Do nothing, we are already at minimum memory. + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_dns_trim + +! +!> Function cp_from_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Copy and convert from psb_z_coo_sparse_mat +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! + +subroutine psb_z_cp_dns_from_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_cp_dns_from_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + ! + type(psb_z_coo_sparse_mat) :: tmp + integer(psb_ipk_) :: nza, nr, i,err_act, nc + integer(psb_ipk_), parameter :: maxtry=8 + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ + + if (.not.b%is_by_rows()) then + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = zzero + do i=1, nza + a%val(tmp%ia(i),tmp%ja(i)) = tmp%val(i) + end do + a%nnz = nza + call tmp%free() + else + if (b%is_dev()) call b%sync() + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + + call psb_realloc(nr,nc,a%val,info) + if (info /= 0) goto 9999 + a%val = zzero + do i=1, nza + a%val(b%ia(i),b%ja(i)) = b%val(i) + end do + a%nnz = nza + end if + call a%set_host() + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_cp_dns_from_coo + + + +! +!> Function cp_to_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Copy and convert to psb_z_coo_sparse_mat +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! + +subroutine psb_z_cp_dns_to_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_cp_dns_to_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_Ipk_) :: nza, nr, nc,i,j,k,err_act + + info = psb_success_ + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + k = 0 + do i=1,a%get_nrows() + do j=1,a%get_ncols() + if (a%val(i,j) /= zzero) then + k = k + 1 + b%ia(k) = i + b%ja(k) = j + b%val(k) = a%val(i,j) + end if + end do + end do + + call b%set_nzeros(nza) + call b%set_sort_status(psb_row_major_) + call b%set_asb() + call b%set_host() + +end subroutine psb_z_cp_dns_to_coo + + + +! +!> Function mv_to_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Convert to psb_z_coo_sparse_mat, freeing the source. +!! Invoked from the source object. +!! \param b The output variable +!! \param info return code +! +subroutine psb_z_mv_dns_to_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_mv_dns_to_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_to_coo(b,info) + call a%free() + return + +end subroutine psb_z_mv_dns_to_coo + + +! +!> Function mv_from_coo: +!! \memberof psb_z_dns_sparse_mat +!! \brief Convert from psb_z_coo_sparse_mat, freeing the source. +!! Invoked from the target object. +!! \param b The input variable +!! \param info return code +! +! +subroutine psb_z_mv_dns_from_coo(a,b,info) + use psb_base_mod + use psb_z_dns_mat_mod, psb_protect_name => psb_z_mv_dns_from_coo + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_dns_from_coo + diff --git a/ext/impl/psb_z_ell_aclsum.f90 b/ext/impl/psb_z_ell_aclsum.f90 new file mode 100644 index 00000000..b03121fd --- /dev/null +++ b/ext/impl/psb_z_ell_aclsum.f90 @@ -0,0 +1,82 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_aclsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_aclsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + abs(a%val(i,j)) + end do + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_aclsum diff --git a/ext/impl/psb_z_ell_allocate_mnnz.f90 b/ext/impl/psb_z_ell_allocate_mnnz.f90 new file mode 100644 index 00000000..f7f7f67e --- /dev/null +++ b/ext/impl/psb_z_ell_allocate_mnnz.f90 @@ -0,0 +1,91 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + a%nzt = -1 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_allocate_mnnz diff --git a/ext/impl/psb_z_ell_arwsum.f90 b/ext/impl/psb_z_ell_arwsum.f90 new file mode 100644 index 00000000..9d4b4949 --- /dev/null +++ b/ext/impl/psb_z_ell_arwsum.f90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_arwsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_arwsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + is_unit = a%is_unit() + + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = done + else + d(i) = dzero + end if + do j=1,a%irn(i) + d(i) = d(i) + abs(a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_arwsum diff --git a/ext/impl/psb_z_ell_colsum.f90 b/ext/impl/psb_z_ell_colsum.f90 new file mode 100644 index 00000000..e9c2bc0b --- /dev/null +++ b/ext/impl/psb_z_ell_colsum.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_colsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_colsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + d(k) = d(k) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_colsum diff --git a/ext/impl/psb_z_ell_csgetblk.f90 b/ext/impl/psb_z_ell_csgetblk.f90 new file mode 100644 index 00000000..d2e56e1d --- /dev/null +++ b/ext/impl/psb_z_ell_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csgetblk + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='ell_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_csgetblk diff --git a/ext/impl/psb_z_ell_csgetptn.f90 b/ext/impl/psb_z_ell_csgetptn.f90 new file mode 100644 index 00000000..97ed7d90 --- /dev/null +++ b/ext/impl/psb_z_ell_csgetptn.f90 @@ -0,0 +1,189 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csgetptn + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_ell_csgetrow + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='ell_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_ell_csmm + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ell_csmv + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='d_ell_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) psb_z_ell_csnm1 + + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info + real(psb_dpk_), allocatable :: vt(:) + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csnm1' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + nnz = a%get_nzeros() + m = a%get_nrows() + n = a%get_ncols() + allocate(vt(n),stat=info) + if (info /= 0) return + if (a%is_unit()) then + vt(:) = done + else + vt(:) = dzero + end if + do i=1, m + do j=1,a%irn(i) + k = a%ja(i,j) + vt(k) = vt(k) + abs(a%val(i,j)) + end do + end do + res = maxval(vt(1:n)) + deallocate(vt,stat=info) + + return + +end function psb_z_ell_csnm1 diff --git a/ext/impl/psb_z_ell_csnmi.f90 b/ext/impl/psb_z_ell_csnmi.f90 new file mode 100644 index 00000000..ecbfb1e1 --- /dev/null +++ b/ext/impl/psb_z_ell_csnmi.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_ell_csnmi(a) result(res) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csnmi + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra, is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + + if (a%is_dev()) call a%sync() + res = dzero + is_unit = a%is_unit() + do i = 1, a%get_nrows() + acc = sum(abs(a%val(i,:))) + if (is_unit) acc = acc + done + res = max(res,acc) + end do + +end function psb_z_ell_csnmi diff --git a/ext/impl/psb_z_ell_csput.f90 b/ext/impl/psb_z_ell_csput.f90 new file mode 100644 index 00000000..cf45070f --- /dev/null +++ b/ext/impl/psb_z_ell_csput.f90 @@ -0,0 +1,208 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_csput_a + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_ell_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5), debug_level, debug_unit + + + call psb_erractionsave(err_act) + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + call psb_z_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info < 0) then + info = psb_err_internal_error_ + else if (info > 0) then + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarded entries not belonging to us.' + info = psb_success_ + end if + call a%set_host() + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +contains + + subroutine psb_z_ell_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, & + & i1,i2,nr,nc,nnz,dupl + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='z_ell_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + ilr = -1 + ilc = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + + end do + + case(psb_dupl_add_) + ! Add + ilr = -1 + ilc = -1 + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + nc = a%irn(ir) + ip = psb_bsrch(ic,nc,a%ja(ir,1:nc)) + if (ip>0) then + a%val(ir,ip) = a%val(ir,ip) + val(i) + else + info = max(info,3) + end if + else + info = max(info,2) + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_z_ell_srch_upd +end subroutine psb_z_ell_csput_a diff --git a/ext/impl/psb_z_ell_cssm.f90 b/ext/impl/psb_z_ell_cssm.f90 new file mode 100644 index 00000000..2e26c656 --- /dev/null +++ b/ext/impl/psb_z_ell_cssm.f90 @@ -0,0 +1,375 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_cssm + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy + complex(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_z_ell_cssv + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc + complex(psb_dpk_) :: acc + complex(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_ell_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x,1) psb_z_ell_get_diag + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + mnm = min(a%get_nrows(),a%get_ncols()) + if (size(d) < mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + + if (a%is_unit()) then + d(1:mnm) = zone + else + do i=1, mnm + if (1<=a%idiag(i).and.(a%idiag(i)<=size(a%ja,2))) then + d(i) = a%val(i,a%idiag(i)) + else + d(i) = zzero + end if + end do + end if + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_get_diag diff --git a/ext/impl/psb_z_ell_maxval.f90 b/ext/impl/psb_z_ell_maxval.f90 new file mode 100644 index 00000000..9596f124 --- /dev/null +++ b/ext/impl/psb_z_ell_maxval.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_ell_maxval(a) result(res) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_maxval + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc + real(psb_dpk_) :: acc + logical :: tra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + res = done + else + res = dzero + end if + + do i = 1, a%get_nrows() + acc = maxval(abs(a%val(i,:))) + res = max(res,acc) + end do + +end function psb_z_ell_maxval diff --git a/ext/impl/psb_z_ell_mold.f90 b/ext/impl/psb_z_ell_mold.f90 new file mode 100644 index 00000000..3e1db6cc --- /dev/null +++ b/ext/impl/psb_z_ell_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_mold(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_mold + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='ell_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_mold diff --git a/ext/impl/psb_z_ell_print.f90 b/ext/impl/psb_z_ell_print.f90 new file mode 100644 index 00000000..502abb94 --- /dev/null +++ b/ext/impl/psb_z_ell_print.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_ell_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% ELL' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(i,j)),a%val(i,j) + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(i,j)),a%val(i,j) + end do + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do i=1, nr + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(i,j)),a%val(i,j) + end do + enddo + endif + endif + +end subroutine psb_z_ell_print diff --git a/ext/impl/psb_z_ell_reallocate_nz.f90 b/ext/impl/psb_z_ell_reallocate_nz.f90 new file mode 100644 index 00000000..58237508 --- /dev/null +++ b/ext/impl/psb_z_ell_reallocate_nz.f90 @@ -0,0 +1,66 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm, ld + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + m = a%get_nrows() + nzrm = (max(nz,ione)+m-1)/m + ld = size(a%ja,1) + call psb_realloc(ld,nzrm,a%ja,info) + if (info == psb_success_) call psb_realloc(ld,nzrm,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_reallocate_nz diff --git a/ext/impl/psb_z_ell_reinit.f90 b/ext/impl/psb_z_ell_reinit.f90 new file mode 100644 index 00000000..d73620d8 --- /dev/null +++ b/ext/impl/psb_z_ell_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_reinit(a,clear) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_reinit + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:,:) = zzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_reinit diff --git a/ext/impl/psb_z_ell_rowsum.f90 b/ext/impl/psb_z_ell_rowsum.f90 new file mode 100644 index 00000000..60eb70af --- /dev/null +++ b/ext/impl/psb_z_ell_rowsum.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_rowsum(d,a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_rowsum + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical :: is_unit + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + is_unit = a%is_unit() + do i = 1, a%get_nrows() + if (is_unit) then + d(i) = zone + else + d(i) = zzero + end if + do j=1,a%irn(i) + d(i) = d(i) + (a%val(i,j)) + end do + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_rowsum diff --git a/ext/impl/psb_z_ell_scal.f90 b/ext/impl/psb_z_ell_scal.f90 new file mode 100644 index 00000000..7f2f8944 --- /dev/null +++ b/ext/impl/psb_z_ell_scal.f90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_scal(d,a,info,side) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_scal + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5) + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + if (a%is_unit()) then + call a%make_nonunit() + end if + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + if (left) then + m = a%get_nrows() + if (size(d) < m) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,size(d,kind=psb_ipk_)/)) + goto 9999 + end if + + do i=1, m + a%val(i,:) = a%val(i,:) * d(i) + enddo + else + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = size(d); + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + + do i=1, m + do j=1, a%irn(i) + a%val(i,j) = a%val(i,j) * d(a%ja(i,j)) + end do + enddo + + end if + + call a%set_host() + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_scal diff --git a/ext/impl/psb_z_ell_scals.f90 b/ext/impl/psb_z_ell_scals.f90 new file mode 100644 index 00000000..4086d8cc --- /dev/null +++ b/ext/impl/psb_z_ell_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_scals(d,a,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_scals + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:,:) = a%val(:,:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_scals diff --git a/ext/impl/psb_z_ell_trim.f90 b/ext/impl/psb_z_ell_trim.f90 new file mode 100644 index 00000000..7cc2ed65 --- /dev/null +++ b/ext/impl/psb_z_ell_trim.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_ell_trim(a) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_ell_trim + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + Integer(psb_ipk_) :: err_act, info, nz, m, nzm + character(len=20) :: name='trim' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + m = max(1_psb_ipk_,a%get_nrows()) + nzm = max(1_psb_ipk_,maxval(a%irn(1:m))) + + call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(m,nzm,a%val,info) + + if (info /= psb_success_) goto 9999 + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_ell_trim diff --git a/ext/impl/psb_z_hdia_allocate_mnnz.f90 b/ext/impl/psb_z_hdia_allocate_mnnz.f90 new file mode 100644 index 00000000..abed0c58 --- /dev/null +++ b/ext/impl/psb_z_hdia_allocate_mnnz.f90 @@ -0,0 +1,75 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_hdia_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hdia_allocate_mnnz diff --git a/ext/impl/psb_z_hdia_csmv.f90 b/ext/impl/psb_z_hdia_csmv.f90 new file mode 100644 index 00000000..73d11da6 --- /dev/null +++ b/ext/impl/psb_z_hdia_csmv.f90 @@ -0,0 +1,162 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psb_z_hdia_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_csmv + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc,nr,nc + integer(psb_ipk_) :: irs,ics, nmx, ni + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, ncd,ib, nzhack, & + & hackfirst, hacknext + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_hdia_csmv' + logical, parameter :: debug=.false. + real :: start, finish + call psb_erractionsave(err_act) + info = psb_success_ + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + if (tra.or.ctra) then + m = a%get_ncols() + n = a%get_nrows() + info = psb_err_transpose_not_n_unsupported_ + call psb_errpush(info,name) + goto 9999 + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1)=0) then + ir1 = 1 + ! min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + jc = ir1 + rdisp + offsets(j) + do i=ir1,ir2 + y(rdisp+i) = y(rdisp+i) + alpha*data(i,j)*x(jc) + jc = jc + 1 + enddo + end do + end subroutine psi_z_inner_dia_csmv + +end subroutine psb_z_hdia_csmv diff --git a/ext/impl/psb_z_hdia_mold.f90 b/ext/impl/psb_z_hdia_mold.f90 new file mode 100644 index 00000000..d91bdb35 --- /dev/null +++ b/ext/impl/psb_z_hdia_mold.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hdia_mold(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_mold + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hdia_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hdia_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hdia_mold diff --git a/ext/impl/psb_z_hdia_print.f90 b/ext/impl/psb_z_hdia_print.f90 new file mode 100644 index 00000000..46f7769d --- /dev/null +++ b/ext/impl/psb_z_hdia_print.f90 @@ -0,0 +1,121 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_hdia_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_hdia_print + use psi_ext_util_mod + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='hdia_print' + logical, parameter :: debug=.false. + + class(psb_z_coo_sparse_mat),allocatable :: acoo + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz + integer(psb_ipk_) :: nhacks, hacksize,maxnzhack, k, ncd,ib, nzhack, info,& + & hackfirst, hacknext + integer(psb_ipk_), allocatable :: ia(:), ja(:) + complex(psb_dpk_), allocatable :: val(:) + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% HDIA' + + if (a%is_dev()) call a%sync() + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + + nhacks = a%nhacks + hacksize = a%hacksize + maxnzhack = 0 + do k=1, nhacks + maxnzhack = max(maxnzhack,(a%hackoffsets(k+1)-a%hackoffsets(k))) + end do + maxnzhack = hacksize*maxnzhack + allocate(ia(maxnzhack),ja(maxnzhack),val(maxnzhack),stat=info) + if (info /= 0) return + + write(iout,*) nr, nc, nz + do k=1, nhacks + i = (k-1)*hacksize + 1 + ib = min(hacksize,nr-i+1) + hackfirst = a%hackoffsets(k) + hacknext = a%hackoffsets(k+1) + ncd = hacknext-hackfirst + + call psi_z_xtr_coo_from_dia(nr,nc,& + & ia, ja, val, nzhack,& + & hacksize,ncd,& + & a%val((hacksize*hackfirst)+1:hacksize*hacknext),& + & a%diaOffsets(hackfirst+1:hacknext),info,rdisp=(i-1)) + !nzhack = sum(ib - abs(a%diaOffsets(hackfirst+1:hacknext))) + + if(present(iv)) then + do j=1,nzhack + write(iout,frmt) iv(ia(j)),iv(ja(j)),val(j) + enddo + else + if (present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ja(j),val(j) + enddo + else if (present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ivr(ia(j)),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ivc(ja(j)),val(j) + enddo + else if (.not.present(ivr).and..not.present(ivc)) then + do j=1,nzhack + write(iout,frmt) ia(j),ja(j),val(j) + enddo + endif + end if + + end do + +end subroutine psb_z_hdia_print diff --git a/ext/impl/psb_z_hll_aclsum.f90 b/ext/impl/psb_z_hll_aclsum.f90 new file mode 100644 index 00000000..e4add299 --- /dev/null +++ b/ext/impl/psb_z_hll_aclsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_aclsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_aclsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='aclsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_aclsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_aclsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_aclsum + +end subroutine psb_z_hll_aclsum diff --git a/ext/impl/psb_z_hll_allocate_mnnz.f90 b/ext/impl/psb_z_hll_allocate_mnnz.f90 new file mode 100644 index 00000000..6ba9d7f1 --- /dev/null +++ b/ext/impl/psb_z_hll_allocate_mnnz.f90 @@ -0,0 +1,93 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_allocate_mnnz(m,n,a,nz) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + Integer(Psb_ipk_) :: err_act, info, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + if (m < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/ione/)) + goto 9999 + endif + if (n < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/2*ione/)) + goto 9999 + endif + if (present(nz)) then + nz_ = (max(nz,ione) + m -1 )/m + else + nz_ = (max(7*m,7*n,ione)+m-1)/m + end if + if (nz_ < 0) then + info = psb_err_iarg_neg_ + call psb_errpush(info,name,i_err=(/3*ione/)) + goto 9999 + endif + + if (info == psb_success_) call psb_realloc(m,a%irn,info) + if (info == psb_success_) call psb_realloc(m,a%idiag,info) + if (info == psb_success_) call psb_realloc(m+1,a%hkoffs,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(m*nz_,a%val,info) + if (info == psb_success_) then + a%irn = 0 + a%idiag = 0 + call a%set_nrows(m) + call a%set_ncols(n) + call a%set_bld() + call a%set_triangle(.false.) + call a%set_unit(.false.) + call a%set_dupl(psb_dupl_def_) + call a%set_hksz(psb_hksz_def_) + call a%set_host() + end if + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_allocate_mnnz diff --git a/ext/impl/psb_z_hll_arwsum.f90 b/ext/impl/psb_z_hll_arwsum.f90 new file mode 100644 index 00000000..a6e020fd --- /dev/null +++ b/ext/impl/psb_z_hll_arwsum.f90 @@ -0,0 +1,108 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_arwsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_arwsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='arwsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = done + else + d = dzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_arwsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_arwsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_arwsum + +end subroutine psb_z_hll_arwsum diff --git a/ext/impl/psb_z_hll_colsum.f90 b/ext/impl/psb_z_hll_colsum.f90 new file mode 100644 index 00000000..196a694e --- /dev/null +++ b/ext/impl/psb_z_hll_colsum.f90 @@ -0,0 +1,109 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_colsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_colsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='colsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < n) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = n + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_colsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_colsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + d(jc) = d(jc) + abs(val(i,j)) + end do + end do + + end subroutine z_hll_colsum + +end subroutine psb_z_hll_colsum diff --git a/ext/impl/psb_z_hll_csgetblk.f90 b/ext/impl/psb_z_hll_csgetblk.f90 new file mode 100644 index 00000000..0cdf1fef --- /dev/null +++ b/ext/impl/psb_z_hll_csgetblk.f90 @@ -0,0 +1,83 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csgetblk + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + Integer(Psb_ipk_) :: err_act, nzin, nzout + character(len=20) :: name='hll_getblk' + logical :: append_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(append)) then + append_ = append + else + append_ = .false. + endif + if (append_) then + nzin = a%get_nzeros() + else + nzin = 0 + endif + + call a%csget(imin,imax,nzout,b%ia,b%ja,b%val,info,& + & jmin=jmin, jmax=jmax, iren=iren, append=append_, & + & nzin=nzin, rscale=rscale, cscale=cscale) + + if (info /= psb_success_) goto 9999 + + call b%set_nzeros(nzin+nzout) + call b%set_host() + call b%fix(info) + if (info /= psb_success_) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_csgetblk diff --git a/ext/impl/psb_z_hll_csgetptn.f90 b/ext/impl/psb_z_hll_csgetptn.f90 new file mode 100644 index 00000000..9d4c6714 --- /dev/null +++ b/ext/impl/psb_z_hll_csgetptn.f90 @@ -0,0 +1,209 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csgetptn + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + + logical :: append_, rscale_, cscale_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getptn' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_hll_csgetrow + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + + logical :: append_, rscale_, cscale_, chksz_ + integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i + character(len=20) :: name='hll_getrow' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(jmin)) then + jmin_ = jmin + else + jmin_ = 1 + endif + if (present(jmax)) then + jmax_ = jmax + else + jmax_ = a%get_ncols() + endif + + if ((imax psb_z_hll_csmm + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nxy,ldx,ldy,hksz,mxrwl + complex(psb_dpk_), allocatable :: acc(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csmm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + nxy = min(size(x,2) , size(y,2) ) + + + ldx = size(x,1) + ldy = size(y,1) + if (a%is_dev()) call a%sync() + + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (ldx psb_z_hll_csmv + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hkpnt, mxrwl, mmhk + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + + if (tra.or.ctra) then + + m = a%get_ncols() + n = a%get_nrows() + if (size(x,1) 0) then + select case(hksz) + case(4) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_4(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(8) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + &call psb_z_hll_csmv_notra_8(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(16) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_16(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(24) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_24(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case(32) + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_notra_32(i,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + case default + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,mmhk,hksz + j = ((i-1)/hksz)+1 + ir = hksz + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + end if + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + end select + end if + if (mmhk < m) then + i = mmhk+1 + ir = m-mmhk + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + if (mxrwl>0) then + hkpnt = a%hkoffs(j) + 1 + call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + if (info /= psb_success_) goto 9999 + end if + j = j + 1 + end if + + else + + j=1 + !$omp parallel do private(i, j,ir,mxrwl, hkpnt) + do i=1,m,hksz + j = ((i-1)/hksz)+1 + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + hkpnt = a%hkoffs(j) + 1 + if (info == psb_success_) & + & call psb_z_hll_csmv_inner(i,ir,mxrwl,a%irn(i),& + & alpha,a%ja(hkpnt),hksz,a%val(hkpnt),hksz,& + & a%is_triangle(),a%is_unit(),& + & x,beta,y,tra,ctra,info) + j = j + 1 + end do + if (info /= psb_success_) goto 9999 + + end if + end if + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csmv_inner(ir,m,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,tra,ctra,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit,tra,ctra + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + if (tra) then + + if (beta == zone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*val(i,j)*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (ctra) then + + if (beta == zone) then + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + y(jc) = y(jc) + alpha*conjg(val(i,j))*x(ir+i-1) + end do + end do + else + info = -10 + + end if + + else if (.not.(tra.or.ctra)) then + + if (alpha == zzero) then + if (beta == zzero) then + do i=1,m + y(ir+i-1) = zzero + end do + else + do i=1,m + y(ir+i-1) = beta*y(ir+i-1) + end do + end if + + else + if (beta == zzero) then + do i=1,m + tmp = zzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + end do + else + do i=1,m + tmp = zzero + do j=1, irn(i) + tmp = tmp + val(i,j)*x(ja(i,j)) + end do + y(ir+i-1) = alpha*tmp + beta*y(ir+i-1) + end do + endif + end if + end if + + if (is_unit) then + do i=1, min(m,n) + y(i) = y(i) + alpha*x(i) + end do + end if + + end subroutine psb_z_hll_csmv_inner + + subroutine psb_z_hll_csmv_notra_8(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=8 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:8)) + tmp(1:8) = tmp(1:8) + val(1:8,j)*x(ja(1:8,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+8-1) = alpha*tmp(1:8) + else + y(ir:ir+8-1) = alpha*tmp(1:8) + beta*y(ir:ir+8-1) + end if + + + if (is_unit) then + do i=1, min(8,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_8 + + subroutine psb_z_hll_csmv_notra_24(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=24 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:24)) + tmp(1:24) = tmp(1:24) + val(1:24,j)*x(ja(1:24,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+24-1) = alpha*tmp(1:24) + else + y(ir:ir+24-1) = alpha*tmp(1:24) + beta*y(ir:ir+24-1) + end if + + + if (is_unit) then + do i=1, min(24,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_24 + + subroutine psb_z_hll_csmv_notra_16(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=16 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:16)) + tmp(1:16) = tmp(1:16) + val(1:16,j)*x(ja(1:16,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+16-1) = alpha*tmp(1:16) + else + y(ir:ir+16-1) = alpha*tmp(1:16) + beta*y(ir:ir+16-1) + end if + + + if (is_unit) then + do i=1, min(16,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_16 + + subroutine psb_z_hll_csmv_notra_32(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=32 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:32)) + tmp(1:32) = tmp(1:32) + val(1:32,j)*x(ja(1:32,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+32-1) = alpha*tmp(1:32) + else + y(ir:ir+32-1) = alpha*tmp(1:32) + beta*y(ir:ir+32-1) + end if + + + if (is_unit) then + do i=1, min(32,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_32 + + subroutine psb_z_hll_csmv_notra_4(ir,n,irn,alpha,ja,ldj,val,ldv,& + & is_triangle,is_unit, x,beta,y,info) + use psb_base_mod, only : psb_ipk_, psb_dpk_, zzero, psb_success_ + implicit none + integer(psb_ipk_), intent(in) :: ir,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: alpha, beta, x(*),val(ldv,*) + complex(psb_dpk_), intent(inout) :: y(*) + logical, intent(in) :: is_triangle,is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_), parameter :: m=4 + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp(m) + + info = psb_success_ + + + tmp(:) = zzero + if (alpha /= zzero) then + do j=1, maxval(irn(1:4)) + tmp(1:4) = tmp(1:4) + val(1:4,j)*x(ja(1:4,j)) + end do + end if + if (beta == zzero) then + y(ir:ir+4-1) = alpha*tmp(1:4) + else + y(ir:ir+4-1) = alpha*tmp(1:4) + beta*y(ir:ir+4-1) + end if + + + if (is_unit) then + do i=1, min(4,n) + y(ir+i-1) = y(ir+i-1) + alpha*x(ir+i-1) + end do + end if + + end subroutine psb_z_hll_csmv_notra_4 + +end subroutine psb_z_hll_csmv diff --git a/ext/impl/psb_z_hll_csnm1.f90 b/ext/impl/psb_z_hll_csnm1.f90 new file mode 100644 index 00000000..eb5c5b6b --- /dev/null +++ b/ext/impl/psb_z_hll_csnm1.f90 @@ -0,0 +1,111 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_csnm1(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csnm1 + + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, info, hksz, mxrwl + real(psb_dpk_), allocatable :: vt(:) + logical :: is_unit + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csnm1' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + res = dzero + if (a%is_dev()) call a%sync() + n = a%get_ncols() + m = a%get_nrows() + allocate(vt(n),stat=info) + if (Info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + end if + if (a%is_unit()) then + vt = done + else + vt = dzero + end if + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_csnm1_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & vt,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + res = maxval(vt) + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csnm1_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & vt,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: vt(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + vt(jc) = vt(jc) + abs(val(i,j)) + end do + end do + end subroutine psb_z_hll_csnm1_inner + +end function psb_z_hll_csnm1 diff --git a/ext/impl/psb_z_hll_csnmi.f90 b/ext/impl/psb_z_hll_csnmi.f90 new file mode 100644 index 00000000..6243e5cf --- /dev/null +++ b/ext/impl/psb_z_hll_csnmi.f90 @@ -0,0 +1,104 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_csnmi(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csnmi + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + integer(psb_ipk_) :: i,j,k,m,n, nr, ir, jc, nc, hksz, mxrwl, info + Integer(Psb_ipk_) :: err_act + logical :: is_unit + character(len=20) :: name='z_csnmi' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + info = 0 + res = dzero + if (a%is_dev()) call a%sync() + + n = a%get_ncols() + m = a%get_nrows() + is_unit = a%is_unit() + hksz = a%get_hksz() + j=1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_csnmi_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & res,is_unit,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + call psb_erractionrestore(err_act) + return + + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_csnmi_inner(ir,m,n,irn,ja,ldj,val,ldv,& + & res,is_unit,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + real(psb_dpk_), intent(inout) :: res + logical :: is_unit + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + real(psb_dpk_) :: tmp, acc + + info = psb_success_ + if (is_unit) then + tmp = done + else + tmp = dzero + end if + do i=1,m + acc = tmp + do j=1, irn(i) + acc = acc + abs(val(i,j)) + end do + res = max(acc,res) + end do + end subroutine psb_z_hll_csnmi_inner + +end function psb_z_hll_csnmi diff --git a/ext/impl/psb_z_hll_csput.f90 b/ext/impl/psb_z_hll_csput.f90 new file mode 100644 index 00000000..e47664c7 --- /dev/null +++ b/ext/impl/psb_z_hll_csput.f90 @@ -0,0 +1,233 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_csput_a + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz, ia(:), ja(:), imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + + + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_csput_a' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: nza, i,j,k, nzl, isza, int_err(5) + + + call psb_erractionsave(err_act) + info = psb_success_ + + if (nz <= 0) then + info = psb_err_iarg_neg_ + int_err(1)=1 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(ia) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=2 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (size(ja) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=3 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + if (size(val) < nz) then + info = psb_err_input_asize_invalid_i_ + int_err(1)=4 + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + if (nz == 0) return + + nza = a%get_nzeros() + + if (a%is_bld()) then + ! Build phase should only ever be in COO + info = psb_err_invalid_mat_state_ + + else if (a%is_upd()) then + if (a%is_dev()) call a%sync() + + call psb_z_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + if (info /= psb_success_) then + + info = psb_err_invalid_mat_state_ + end if + call a%set_host() + + else + ! State is wrong. + info = psb_err_invalid_mat_state_ + end if + if (info /= psb_success_) then + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_srch_upd(nz,ia,ja,val,a,& + & imin,imax,jmin,jmax,info) + + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax + integer(psb_ipk_), intent(in) :: ia(:),ja(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i,ir,ic, ip, i1,i2,nr,nc,nnz,dupl,ng,& + & hksz, hk, hkzpnt, ihkr, mxrwl, lastrow + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name='z_hll_srch_upd' + + info = psb_success_ + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + dupl = a%get_dupl() + + if (.not.a%is_sorted()) then + info = -4 + return + end if + + lastrow = -1 + nnz = a%get_nzeros() + nr = a%get_nrows() + nc = a%get_ncols() + hksz = a%get_hksz() + + select case(dupl) + case(psb_dupl_ovwrt_,psb_dupl_err_) + ! Overwrite. + ! Cannot test for error, should have been caught earlier. + + do i=1, nz + ir = ia(i) + ic = ja(i) + + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + + end do + + case(psb_dupl_add_) + ! Add + do i=1, nz + ir = ia(i) + ic = ja(i) + if ((ir > 0).and.(ir <= nr)) then + if (ir /= lastrow) then + hk = ((ir-1)/hksz) + lastrow = ir + ihkr = ir - hk*hksz + hk = hk + 1 + hkzpnt = a%hkoffs(hk) + mxrwl = (a%hkoffs(hk+1) - a%hkoffs(hk))/hksz + nc = a%irn(ir) + end if + + ip = psb_bsrch(ic,nc,a%ja(hkzpnt+ihkr:hkzpnt+ihkr+(nc-1)*hksz:hksz)) + if (ip>0) then + a%val(hkzpnt+ihkr+(ip-1)*hksz) = val(i) + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Was searching ',ic,' in: ',nc,& + & ' : ',a%ja(hkzpnt+ir:hkzpnt+ir+(nc-1)*hksz:hksz) + info = i + return + end if + + else + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Discarding row that does not belong to us.' + end if + end do + + case default + info = -3 + if (debug_level >= psb_debug_serial_) & + & write(debug_unit,*) trim(name),& + & ': Duplicate handling: ',dupl + end select + + end subroutine psb_z_hll_srch_upd + +end subroutine psb_z_hll_csput_a diff --git a/ext/impl/psb_z_hll_cssm.f90 b/ext/impl/psb_z_hll_cssm.f90 new file mode 100644 index 00000000..ba1aa150 --- /dev/null +++ b/ext/impl/psb_z_hll_cssm.f90 @@ -0,0 +1,506 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_cssm(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_cssm + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ldx, ldy, hksz, nxy, mk, mxrwl + complex(psb_dpk_), allocatable :: tmp(:,:), acc(:) + logical :: tra, ctra + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_cssm' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + info = psb_err_missing_override_method_ + call psb_errpush(info,name) + goto 9999 + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + hksz = a%get_hksz() + + if (.not. (a%is_triangle())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + ldx = size(x,1) + ldy = size(y,1) + if (ldx psb_z_hll_cssv + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, ic, hksz, hk, mxrwl, noffs, kc, mk + complex(psb_dpk_) :: acc + complex(psb_dpk_), allocatable :: tmp(:) + logical :: tra, ctra + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='z_hll_cssv' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + if (a%is_dev()) call a%sync() + tra = (psb_toupper(trans_) == 'T') + ctra = (psb_toupper(trans_) == 'C') + m = a%get_nrows() + + if (.not. (a%is_triangle().and.a%is_sorted())) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + if (size(x) psb_z_hll_get_diag + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act, mnm, i, j, k, ke, hksz, ld,ir, mxrwl + character(len=20) :: name='get_diag' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + mnm = min(a%get_nrows(),a%get_ncols()) + ld = size(d) + if (ld< mnm) then + info=psb_err_input_asize_invalid_i_ + call psb_errpush(info,name,i_err=(/2*ione,ld/)) + goto 9999 + end if + + if (a%is_triangle().and.a%is_unit()) then + d(1:mnm) = zone + else + + hksz = a%get_hksz() + j=1 + do i=1,mnm,hksz + ir = min(hksz,mnm-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + ke = a%hkoffs(j+1) + call psb_z_hll_get_diag_inner(ir,a%irn(i:i+ir-1),& + & a%ja(k:ke),hksz,a%val(k:ke),hksz,& + & a%idiag(i:i+ir-1),d(i:i+ir-1),info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + end if + + do i=mnm+1,size(d) + d(i) = zzero + end do + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_get_diag_inner(m,irn,ja,ldj,val,ldv,& + & idiag,d,info) + integer(psb_ipk_), intent(in) :: m,ldj,ldv,ja(ldj,*),irn(*), idiag(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + do i=1,m + if (idiag(i) /= 0) then + d(i) = val(i,idiag(i)) + else + d(i) = zzero + end if + end do + + end subroutine psb_z_hll_get_diag_inner + +end subroutine psb_z_hll_get_diag diff --git a/ext/impl/psb_z_hll_maxval.f90 b/ext/impl/psb_z_hll_maxval.f90 new file mode 100644 index 00000000..22258c3a --- /dev/null +++ b/ext/impl/psb_z_hll_maxval.f90 @@ -0,0 +1,45 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +function psb_z_hll_maxval(a) result(res) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_maxval + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + + if (a%is_dev()) call a%sync() + res = maxval(abs(a%val(:))) + if (a%is_unit()) res = max(res,done) + +end function psb_z_hll_maxval diff --git a/ext/impl/psb_z_hll_mold.f90 b/ext/impl/psb_z_hll_mold.f90 new file mode 100644 index 00000000..e108e9ce --- /dev/null +++ b/ext/impl/psb_z_hll_mold.f90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_mold(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_mold + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + Integer(Psb_ipk_) :: err_act + character(len=20) :: name='hll_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b,stat=info) + end if + if (info == 0) allocate(psb_z_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_mold diff --git a/ext/impl/psb_z_hll_print.f90 b/ext/impl/psb_z_hll_print.f90 new file mode 100644 index 00000000..43882264 --- /dev/null +++ b/ext/impl/psb_z_hll_print.f90 @@ -0,0 +1,134 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_print(iout,a,iv,head,ivr,ivc) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_print + implicit none + + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_hll_print' + logical, parameter :: debug=.false. + + character(len=80) :: frmt + integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nr, nc, nz, k, hksz, hk, mxrwl,ir, ix + + + write(iout,'(a)') '%%MatrixMarket matrix coordinate real general' + if (present(head)) write(iout,'(a,a)') '% ',head + write(iout,'(a)') '%' + write(iout,'(a,a)') '% COO' + + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nz = a%get_nzeros() + frmt = psb_z_get_print_frmt(nr,nc,nz,iv,ivr,ivc) + + hksz = a%get_hksz() + + write(iout,*) nr, nc, nz + if(present(iv)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) iv(i),iv(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else + if (present(ivr).and..not.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) ivr(i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + else if (.not.present(ivr).and.present(ivc)) then + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),ivc(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + + else if (.not.present(ivr).and..not.present(ivc)) then + + do i=1, nr + irs = (i-1)/hksz + hk = irs + 1 + mxrwl = (a%hkoffs(hk+1)-a%hkoffs(hk))/hksz + k = a%hkoffs(hk) + k = k + (i-(irs*hksz)) + do j=1,a%irn(i) + write(iout,frmt) (i),(a%ja(k)),a%val(k) + k = k + hksz + end do + enddo + endif + endif + +end subroutine psb_z_hll_print diff --git a/ext/impl/psb_z_hll_reallocate_nz.f90 b/ext/impl/psb_z_hll_reallocate_nz.f90 new file mode 100644 index 00000000..23432f9f --- /dev/null +++ b/ext/impl/psb_z_hll_reallocate_nz.f90 @@ -0,0 +1,64 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_reallocate_nz(nz,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: m, nzrm,nz_ + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='z_hll_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + + ! + ! What should this really do??? + ! + nz_ = max(nz,ione) + call psb_realloc(nz_,a%ja,info) + if (info == psb_success_) call psb_realloc(nz_,a%val,info) + if (info /= psb_success_) then + call psb_errpush(psb_err_alloc_dealloc_,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_reallocate_nz diff --git a/ext/impl/psb_z_hll_reinit.f90 b/ext/impl/psb_z_hll_reinit.f90 new file mode 100644 index 00000000..b6851c61 --- /dev/null +++ b/ext/impl/psb_z_hll_reinit.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_reinit(a,clear) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_reinit + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + + Integer(Psb_ipk_) :: err_act, info + character(len=20) :: name='reinit' + logical :: clear_ + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + + if (present(clear)) then + clear_ = clear + else + clear_ = .true. + end if + + if (a%is_bld() .or. a%is_upd()) then + ! do nothing + return + else if (a%is_asb()) then + if (a%is_dev()) call a%sync() + if (clear_) a%val(:) = zzero + call a%set_upd() + call a%set_host() + else + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_reinit diff --git a/ext/impl/psb_z_hll_rowsum.f90 b/ext/impl/psb_z_hll_rowsum.f90 new file mode 100644 index 00000000..027c5b22 --- /dev/null +++ b/ext/impl/psb_z_hll_rowsum.f90 @@ -0,0 +1,110 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_rowsum(d,a) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_rowsum + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + + integer(psb_ipk_) :: i,j,k,m,n, nnz, ir, jc, nc, hksz, mxrwl + logical :: tra + Integer(Psb_ipk_) :: err_act, info, int_err(5) + character(len=20) :: name='rowsum' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = 0 + if (a%is_dev()) call a%sync() + + m = a%get_nrows() + n = a%get_ncols() + if (size(d) < m) then + info=psb_err_input_asize_small_i_ + int_err(1) = 1 + int_err(2) = size(d) + int_err(3) = m + call psb_errpush(info,name,i_err=int_err) + goto 9999 + end if + + + if (a%is_unit()) then + d = zone + else + d = zzero + end if + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call z_hll_rowsum(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz, & + & d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine z_hll_rowsum(ir,m,n,irn,ja,ldj,val,ldv,& + & d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: val(ldv,*) + complex(psb_dpk_), intent(inout) :: d(*) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + complex(psb_dpk_) :: acc(4), tmp + + info = psb_success_ + do i=1,m + do j=1, irn(i) + d(ir+i-1) = d(ir+i-1) + (val(i,j)) + end do + end do + + end subroutine z_hll_rowsum + +end subroutine psb_z_hll_rowsum diff --git a/ext/impl/psb_z_hll_scal.f90 b/ext/impl/psb_z_hll_scal.f90 new file mode 100644 index 00000000..a11d0da8 --- /dev/null +++ b/ext/impl/psb_z_hll_scal.f90 @@ -0,0 +1,135 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_scal(d,a,info,side) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_scal + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m, n, ierr(5), ld, k, mxrwl, hksz, ir + character(len=20) :: name='scal' + character :: side_ + logical :: left + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_dev()) call a%sync() + + info = psb_err_missing_override_method_ + call psb_errpush(info,name,i_err=ierr) + goto 9999 + + side_ = 'L' + if (present(side)) then + side_ = psb_toupper(side) + end if + + left = (side_ == 'L') + + ld = size(d) + if (left) then + m = a%get_nrows() + if (ld < m) then + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + else + n = a%get_ncols() + if (ld < n) then + info=psb_err_input_asize_invalid_i_ + ierr(1) = 2; ierr(2) = ld; + call psb_errpush(info,name,i_err=ierr) + goto 9999 + end if + end if + + hksz = a%get_hksz() + j = 1 + do i=1,m,hksz + ir = min(hksz,m-i+1) + mxrwl = (a%hkoffs(j+1) - a%hkoffs(j))/hksz + k = a%hkoffs(j) + 1 + call psb_z_hll_scal_inner(i,ir,mxrwl,a%irn(i),& + & a%ja(k),hksz,a%val(k),hksz,& + & left,d,info) + if (info /= psb_success_) goto 9999 + j = j + 1 + end do + + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + + subroutine psb_z_hll_scal_inner(ir,m,n,irn,ja,ldj,val,ldv,left,d,info) + integer(psb_ipk_), intent(in) :: ir,m,n,ldj,ldv,ja(ldj,*),irn(*) + complex(psb_dpk_), intent(in) :: d(*) + complex(psb_dpk_), intent(inout) :: val(ldv,*) + logical, intent(in) :: left + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i,j,k, m4, jc + + info = psb_success_ + + if (left) then + do i=1,m + do j=1, irn(i) + val(i,j) = val(i,j)*d(ir+i-1) + end do + end do + else + do i=1,m + do j=1, irn(i) + jc = ja(i,j) + val(i,j) = val(i,j)*d(jc) + end do + end do + + end if + + end subroutine psb_z_hll_scal_inner + + +end subroutine psb_z_hll_scal diff --git a/ext/impl/psb_z_hll_scals.f90 b/ext/impl/psb_z_hll_scals.f90 new file mode 100644 index 00000000..432f11e6 --- /dev/null +++ b/ext/impl/psb_z_hll_scals.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_hll_scals(d,a,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_hll_scals + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + Integer(Psb_ipk_) :: err_act,mnm, i, j, m + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + + info = psb_success_ + call psb_erractionsave(err_act) + if (a%is_dev()) call a%sync() + + if (a%is_unit()) then + call a%make_nonunit() + end if + + a%val(:) = a%val(:) * d + call a%set_host() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_z_hll_scals diff --git a/ext/impl/psb_z_mv_dia_from_coo.f90 b/ext/impl/psb_z_mv_dia_from_coo.f90 new file mode 100644 index 00000000..29e27dfc --- /dev/null +++ b/ext/impl/psb_z_mv_dia_from_coo.f90 @@ -0,0 +1,62 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_dia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_mv_dia_from_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_dia_from_coo diff --git a/ext/impl/psb_z_mv_dia_to_coo.f90 b/ext/impl/psb_z_mv_dia_to_coo.f90 new file mode 100644 index 00000000..1679c9e0 --- /dev/null +++ b/ext/impl/psb_z_mv_dia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_dia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psb_z_mv_dia_to_coo + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_dia_to_coo diff --git a/ext/impl/psb_z_mv_ell_from_coo.f90 b/ext/impl/psb_z_mv_ell_from_coo.f90 new file mode 100644 index 00000000..de39604e --- /dev/null +++ b/ext/impl/psb_z_mv_ell_from_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_from_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_from_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + if (.not.b%is_by_rows()) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + call b%free() + + return + +end subroutine psb_z_mv_ell_from_coo diff --git a/ext/impl/psb_z_mv_ell_from_fmt.f90 b/ext/impl/psb_z_mv_ell_from_fmt.f90 new file mode 100644 index 00000000..a2c7c190 --- /dev/null +++ b/ext/impl/psb_z_mv_ell_from_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_from_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_z_ell_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_ell_from_fmt diff --git a/ext/impl/psb_z_mv_ell_to_coo.f90 b/ext/impl/psb_z_mv_ell_to_coo.f90 new file mode 100644 index 00000000..3f8afb0a --- /dev/null +++ b/ext/impl/psb_z_mv_ell_to_coo.f90 @@ -0,0 +1,89 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_to_coo(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_to_coo + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + if (a%is_dev()) call a%sync() + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + ! Taking a path slightly slower but with less memory footprint + deallocate(a%idiag) + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + + call psb_realloc(nza,b%ia,info) + if (info == 0) call psb_realloc(nza,b%ja,info) + if (info /= 0) goto 9999 + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%ia(k) = i + b%ja(k) = a%ja(i,j) + end do + end do + deallocate(a%ja, stat=info) + + if (info == 0) call psb_realloc(nza,b%val,info) + if (info /= 0) goto 9999 + + k=0 + do i=1, nr + do j=1,a%irn(i) + k = k + 1 + b%val(k) = a%val(i,j) + end do + end do + call a%free() + call b%set_nzeros(nza) + call b%set_host() + call b%fix(info) + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_ell_to_coo diff --git a/ext/impl/psb_z_mv_ell_to_fmt.f90 b/ext/impl/psb_z_mv_ell_to_fmt.f90 new file mode 100644 index 00000000..d34ae80e --- /dev/null +++ b/ext/impl/psb_z_mv_ell_to_fmt.f90 @@ -0,0 +1,67 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_ell_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psb_z_mv_ell_to_fmt + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_z_ell_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_ell_to_fmt diff --git a/ext/impl/psb_z_mv_hdia_from_coo.f90 b/ext/impl/psb_z_mv_hdia_from_coo.f90 new file mode 100644 index 00000000..b9593f34 --- /dev/null +++ b/ext/impl/psb_z_mv_hdia_from_coo.f90 @@ -0,0 +1,60 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_hdia_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_mv_hdia_from_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: err_act + + info = psb_success_ + + if (.not.(b%is_by_rows())) call b%fix(info) + if (info /= psb_success_) return + + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hdia_from_coo diff --git a/ext/impl/psb_z_mv_hdia_to_coo.f90 b/ext/impl/psb_z_mv_hdia_to_coo.f90 new file mode 100644 index 00000000..f4c8df55 --- /dev/null +++ b/ext/impl/psb_z_mv_hdia_to_coo.f90 @@ -0,0 +1,55 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psb_z_mv_hdia_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hdia_mat_mod, psb_protect_name => psb_z_mv_hdia_to_coo + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, idl,err_act + + info = psb_success_ + + call a%cp_to_coo(b,info) + if (info /= 0) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_hdia_to_coo diff --git a/ext/impl/psb_z_mv_hll_from_coo.f90 b/ext/impl/psb_z_mv_hll_from_coo.f90 new file mode 100644 index 00000000..abe988b3 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_from_coo.f90 @@ -0,0 +1,58 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_from_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_from_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: hksz + info = psb_success_ + if (.not.b%is_by_rows()) call b%fix(info) + hksz = psi_get_hksz() + call psi_convert_hll_from_coo(a,hksz,b,info) + if (info /= 0) goto 9999 + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_z_mv_hll_from_coo diff --git a/ext/impl/psb_z_mv_hll_from_fmt.f90 b/ext/impl/psb_z_mv_hll_from_fmt.f90 new file mode 100644 index 00000000..81626aba --- /dev/null +++ b/ext/impl/psb_z_mv_hll_from_fmt.f90 @@ -0,0 +1,70 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_from_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_from_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b,info) + + type is (psb_z_hll_sparse_mat) + if (b%is_dev()) call b%sync() + a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat + call move_alloc(b%irn, a%irn) + call move_alloc(b%idiag, a%idiag) + call move_alloc(b%hkoffs, a%hkoffs) + call move_alloc(b%ja, a%ja) + call move_alloc(b%val, a%val) + a%hksz = b%hksz + a%nzt = b%nzt + call b%free() + call a%set_host() + + class default + call b%mv_to_coo(tmp,info) + if (info == psb_success_) call a%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hll_from_fmt diff --git a/ext/impl/psb_z_mv_hll_to_coo.f90 b/ext/impl/psb_z_mv_hll_to_coo.f90 new file mode 100644 index 00000000..af033004 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_to_coo.f90 @@ -0,0 +1,56 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_to_coo(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_to_coo + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + info = psb_success_ + + call a%cp_to_coo(b,info) + + if (info /= psb_success_) goto 9999 + call a%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return +end subroutine psb_z_mv_hll_to_coo diff --git a/ext/impl/psb_z_mv_hll_to_fmt.f90 b/ext/impl/psb_z_mv_hll_to_fmt.f90 new file mode 100644 index 00000000..a2fd7027 --- /dev/null +++ b/ext/impl/psb_z_mv_hll_to_fmt.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_z_mv_hll_to_fmt(a,b,info) + + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psb_z_mv_hll_to_fmt + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_z_coo_sparse_mat) :: tmp + + info = psb_success_ + + select type (b) + type is (psb_z_coo_sparse_mat) + call a%mv_to_coo(b,info) + ! Need to fix trivial copies! + type is (psb_z_hll_sparse_mat) + if (a%is_dev()) call a%sync() + b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat + call move_alloc(a%irn, b%irn) + call move_alloc(a%hkoffs, b%hkoffs) + call move_alloc(a%idiag, b%idiag) + call move_alloc(a%ja, b%ja) + call move_alloc(a%val, b%val) + b%hksz = a%hksz + call a%free() + call b%set_host() + + class default + call a%mv_to_coo(tmp,info) + if (info == psb_success_) call b%mv_from_coo(tmp,info) + end select + +end subroutine psb_z_mv_hll_to_fmt diff --git a/ext/impl/psi_c_convert_dia_from_coo.f90 b/ext/impl/psi_c_convert_dia_from_coo.f90 new file mode 100644 index 00000000..29565748 --- /dev/null +++ b/ext/impl/psi_c_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_c_dia_mat_mod, psb_protect_name => psi_c_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_c_convert_dia_from_coo diff --git a/ext/impl/psi_c_convert_ell_from_coo.f90 b/ext/impl/psi_c_convert_ell_from_coo.f90 new file mode 100644 index 00000000..b4e0c7e4 --- /dev/null +++ b/ext/impl/psi_c_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_c_ell_mat_mod, psb_protect_name => psi_c_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_c_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_c_convert_ell_from_coo + diff --git a/ext/impl/psi_c_convert_hll_from_coo.f90 b/ext/impl/psi_c_convert_hll_from_coo.f90 new file mode 100644 index 00000000..2ebb86a6 --- /dev/null +++ b/ext/impl/psi_c_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_c_hll_mat_mod, psb_protect_name => psi_c_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = czero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_c_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_c_convert_hll_from_coo diff --git a/ext/impl/psi_c_xtr_coo_from_dia.f90 b/ext/impl/psi_c_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..eab82a11 --- /dev/null +++ b/ext/impl/psi_c_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_c_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + complex(psb_spk_), intent(inout) :: val(:) + complex(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_c_xtr_coo_from_dia + diff --git a/ext/impl/psi_c_xtr_dia_from_coo.f90 b/ext/impl/psi_c_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..f72a03df --- /dev/null +++ b/ext/impl/psi_c_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_c_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = czero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_c_xtr_dia_from_coo diff --git a/ext/impl/psi_c_xtr_ell_from_coo.f90 b/ext/impl/psi_c_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..706e6c1f --- /dev/null +++ b/ext/impl/psi_c_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_c_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, czero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = czero + end do + end do +end subroutine psi_c_xtr_ell_from_coo + diff --git a/ext/impl/psi_d_convert_dia_from_coo.f90 b/ext/impl/psi_d_convert_dia_from_coo.f90 new file mode 100644 index 00000000..5f821967 --- /dev/null +++ b/ext/impl/psi_d_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_d_dia_mat_mod, psb_protect_name => psi_d_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_d_convert_dia_from_coo diff --git a/ext/impl/psi_d_convert_ell_from_coo.f90 b/ext/impl/psi_d_convert_ell_from_coo.f90 new file mode 100644 index 00000000..51471c19 --- /dev/null +++ b/ext/impl/psi_d_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_d_ell_mat_mod, psb_protect_name => psi_d_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_d_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_d_convert_ell_from_coo + diff --git a/ext/impl/psi_d_convert_hll_from_coo.f90 b/ext/impl/psi_d_convert_hll_from_coo.f90 new file mode 100644 index 00000000..cb07e52c --- /dev/null +++ b/ext/impl/psi_d_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_d_hll_mat_mod, psb_protect_name => psi_d_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = dzero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_d_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_d_convert_hll_from_coo diff --git a/ext/impl/psi_d_xtr_coo_from_dia.f90 b/ext/impl/psi_d_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..5fc98b82 --- /dev/null +++ b/ext/impl/psi_d_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_d_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + real(psb_dpk_), intent(inout) :: val(:) + real(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_d_xtr_coo_from_dia + diff --git a/ext/impl/psi_d_xtr_dia_from_coo.f90 b/ext/impl/psi_d_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..cd95b64e --- /dev/null +++ b/ext/impl/psi_d_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_d_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = dzero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_d_xtr_dia_from_coo diff --git a/ext/impl/psi_d_xtr_ell_from_coo.f90 b/ext/impl/psi_d_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..ec520797 --- /dev/null +++ b/ext/impl/psi_d_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_d_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, dzero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = dzero + end do + end do +end subroutine psi_d_xtr_ell_from_coo + diff --git a/ext/impl/psi_s_convert_dia_from_coo.f90 b/ext/impl/psi_s_convert_dia_from_coo.f90 new file mode 100644 index 00000000..d2f58778 --- /dev/null +++ b/ext/impl/psi_s_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_s_dia_mat_mod, psb_protect_name => psi_s_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_s_convert_dia_from_coo diff --git a/ext/impl/psi_s_convert_ell_from_coo.f90 b/ext/impl/psi_s_convert_ell_from_coo.f90 new file mode 100644 index 00000000..ecdd9b1e --- /dev/null +++ b/ext/impl/psi_s_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_s_ell_mat_mod, psb_protect_name => psi_s_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_s_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_s_convert_ell_from_coo + diff --git a/ext/impl/psi_s_convert_hll_from_coo.f90 b/ext/impl/psi_s_convert_hll_from_coo.f90 new file mode 100644 index 00000000..dcf6c4e2 --- /dev/null +++ b/ext/impl/psi_s_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_s_hll_mat_mod, psb_protect_name => psi_s_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_s_base_sparse_mat = tmp%psb_s_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = szero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_s_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_s_convert_hll_from_coo diff --git a/ext/impl/psi_s_xtr_coo_from_dia.f90 b/ext/impl/psi_s_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..3745365b --- /dev/null +++ b/ext/impl/psi_s_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_s_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + real(psb_spk_), intent(inout) :: val(:) + real(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_s_xtr_coo_from_dia + diff --git a/ext/impl/psi_s_xtr_dia_from_coo.f90 b/ext/impl/psi_s_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..a8ee7c4b --- /dev/null +++ b/ext/impl/psi_s_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_s_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + real(psb_spk_), intent(in) :: val(:) + real(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = szero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_s_xtr_dia_from_coo diff --git a/ext/impl/psi_s_xtr_ell_from_coo.f90 b/ext/impl/psi_s_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..0bac2ec0 --- /dev/null +++ b/ext/impl/psi_s_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_s_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_spk_, szero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_spk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = szero + end do + end do +end subroutine psi_s_xtr_ell_from_coo + diff --git a/ext/impl/psi_z_convert_dia_from_coo.f90 b/ext/impl/psi_z_convert_dia_from_coo.f90 new file mode 100644 index 00000000..ddc9d2fd --- /dev/null +++ b/ext/impl/psi_z_convert_dia_from_coo.f90 @@ -0,0 +1,73 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_dia_from_coo(a,tmp,info) + use psb_base_mod + use psb_z_dia_mat_mod, psb_protect_name => psi_z_convert_dia_from_coo + use psi_ext_util_mod + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + + !locals + integer(psb_ipk_) :: ndiag,nd + integer(psb_ipk_),allocatable :: d(:) + integer(psb_ipk_) :: k,i,j,nc,nr,nza,ir,ic + + info = psb_success_ + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ndiag = nr+nc-1 + allocate(d(ndiag),stat=info) + if (info /= 0) return + call psb_realloc(ndiag,a%offset,info) + if (info /= 0) return + + call psi_dia_offset_from_coo(nr,nc,nza,tmp%ia,tmp%ja, & + & nd,d,a%offset,info,initd=.true.,cleard=.false.) + + call psb_realloc(nd,a%offset,info) + if (info /= 0) return + call psb_realloc(nr,nd,a%data,info) + if (info /= 0) return + a%nzeros = nza + + call psi_xtr_dia_from_coo(nr,nc,nza,tmp%ia,tmp%ja,tmp%val,& + & d,nr,nd,a%data,info,initdata=.true.) + + deallocate(d,stat=info) + if (info /= 0) return + +end subroutine psi_z_convert_dia_from_coo diff --git a/ext/impl/psi_z_convert_ell_from_coo.f90 b/ext/impl/psi_z_convert_ell_from_coo.f90 new file mode 100644 index 00000000..3d37c11f --- /dev/null +++ b/ext/impl/psi_z_convert_ell_from_coo.f90 @@ -0,0 +1,87 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_ell_from_coo(a,tmp,info,hacksize) + + use psb_base_mod + use psb_z_ell_mat_mod, psb_protect_name => psi_z_convert_ell_from_coo + use psi_ext_util_mod + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, & + & ir, ic, hsz_, ldv + + info = psb_success_ + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + + hsz_ = 1 + if (present(hacksize)) then + if (hacksize> 1) hsz_ = hacksize + end if + ! Make ldv a multiple of hacksize + ldv = ((nr+hsz_-1)/hsz_)*hsz_ + + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= psb_success_) return + a%irn = 0 + do i=1, nza + ir = tmp%ia(i) + a%irn(ir) = a%irn(ir) + 1 + end do + nzm = 0 + a%nzt = 0 + do i=1,nr + nzm = max(nzm,a%irn(i)) + a%nzt = a%nzt + a%irn(i) + end do + ! Allocate and extract. + call psb_realloc(nr,a%idiag,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%ja,info) + if (info == psb_success_) call psb_realloc(ldv,nzm,a%val,info) + if (info /= psb_success_) return + + call psi_z_xtr_ell_from_coo(ione,nr,nzm,tmp%ia,tmp%ja,tmp%val,& + & a%ja,a%val,a%irn,a%idiag,ldv) + +end subroutine psi_z_convert_ell_from_coo + diff --git a/ext/impl/psi_z_convert_hll_from_coo.f90 b/ext/impl/psi_z_convert_hll_from_coo.f90 new file mode 100644 index 00000000..bc9fdde1 --- /dev/null +++ b/ext/impl/psi_z_convert_hll_from_coo.f90 @@ -0,0 +1,122 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_convert_hll_from_coo(a,hksz,tmp,info) + use psb_base_mod + use psb_z_hll_mat_mod, psb_protect_name => psi_z_convert_hll_from_coo + use psi_ext_util_mod + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(in) :: hksz + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc, isz,irs + integer(psb_ipk_) :: nzm, ir, ic, k, hk, mxrwl, noffs, kc + + + if (.not.tmp%is_by_rows()) then + info = -98765 + return + end if + + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_z_base_sparse_mat = tmp%psb_z_base_sparse_mat + + ! First compute the number of nonzeros in each row. + call psb_realloc(nr,a%irn,info) + if (info /= 0) return + a%irn = 0 + do i=1, nza + a%irn(tmp%ia(i)) = a%irn(tmp%ia(i)) + 1 + end do + + a%nzt = nza + ! Second. Figure out the block offsets. + call a%set_hksz(hksz) + noffs = (nr+hksz-1)/hksz + call psb_realloc(noffs+1,a%hkoffs,info) + if (info /= 0) return + a%hkoffs(1) = 0 + j=1 + do i=1,nr,hksz + ir = min(hksz,nr-i+1) + mxrwl = a%irn(i) + do k=1,ir-1 + mxrwl = max(mxrwl,a%irn(i+k)) + end do + a%hkoffs(j+1) = a%hkoffs(j) + mxrwl*hksz + j = j + 1 + end do + + ! + ! At this point a%hkoffs(noffs+1) contains the allocation + ! size a%ja a%val. + ! + isz = a%hkoffs(noffs+1) + call psb_realloc(nr,a%idiag,info) + if (info == 0) call psb_realloc(isz,a%ja,info) + if (info == 0) call psb_realloc(isz,a%val,info) + if (info /= 0) return + ! Init last chunk of data + nzm = a%hkoffs(noffs+1)-a%hkoffs(noffs) + a%val(isz-(nzm-1):isz) = zzero + a%ja(isz-(nzm-1):isz) = nr + ! + ! Now copy everything, noting the position of the diagonal. + ! + kc = 1 + k = 1 + do i=1, nr,hksz + ir = min(hksz,nr-i+1) + irs = (i-1)/hksz + hk = irs + 1 + isz = (a%hkoffs(hk+1)-a%hkoffs(hk)) + mxrwl = isz/hksz + nza = sum(a%irn(i:i+ir-1)) + call psi_z_xtr_ell_from_coo(i,ir,mxrwl,tmp%ia(kc:kc+nza-1),& + & tmp%ja(kc:kc+nza-1),tmp%val(kc:kc+nza-1),& + & a%ja(k:k+isz-1),a%val(k:k+isz-1),a%irn(i:i+ir-1),& + & a%idiag(i:i+ir-1),hksz) + k = k + isz + kc = kc + nza + + enddo + + ! Third copy the other stuff + if (info /= 0) return + call a%set_sorted(.true.) + +end subroutine psi_z_convert_hll_from_coo diff --git a/ext/impl/psi_z_xtr_coo_from_dia.f90 b/ext/impl/psi_z_xtr_coo_from_dia.f90 new file mode 100644 index 00000000..70d0938f --- /dev/null +++ b/ext/impl/psi_z_xtr_coo_from_dia.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_z_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,info,rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:),nz + complex(psb_dpk_), intent(inout) :: val(:) + complex(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + integer(psb_ipk_) :: rdisp_, nrcmdisp, rdisp1 + integer(psb_ipk_) :: i,j,ir1, ir2, ir, ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_coo_from_dia',nr,nc,nrd,ncd, rdisp_ + nrcmdisp = min(nr-rdisp_,nc-rdisp_) + rdisp1 = 1-rdisp_ + nz = 0 + do j=1, ncd + if (offsets(j)>=0) then + ir1 = 1 + ! ir2 = min(nrd,nr - offsets(j) - rdisp_,nc-offsets(j)-rdisp_) + ir2 = min(nrd, nrcmdisp - offsets(j)) + else + ! ir1 = max(1,1-offsets(j)-rdisp_) + ir1 = max(1, rdisp1 - offsets(j)) + ir2 = min(nrd, nrcmdisp) + end if + if (debug) write(0,*) ' Loop J',j,ir1,ir2, offsets(j) + do i=ir1,ir2 + ir = i + rdisp_ + ic = i + rdisp_ + offsets(j) + if (debug) write(0,*) ' Loop I',i,ir,ic + nz = nz + 1 + ia(nz) = ir + ja(nz) = ic + val(nz) = data(i,j) + enddo + end do + +end subroutine psi_z_xtr_coo_from_dia + diff --git a/ext/impl/psi_z_xtr_dia_from_coo.f90 b/ext/impl/psi_z_xtr_dia_from_coo.f90 new file mode 100644 index 00000000..6b2542c6 --- /dev/null +++ b/ext/impl/psi_z_xtr_dia_from_coo.f90 @@ -0,0 +1,69 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +subroutine psi_z_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata, rdisp) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd,ncd,ia(:), ja(:), d(:) + complex(psb_dpk_), intent(in) :: val(:) + complex(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + !locals + logical :: initdata_ + integer(psb_ipk_) :: rdisp_ + integer(psb_ipk_) :: i,ir,ic,k + logical, parameter :: debug=.false. + + info = psb_success_ + initdata_ = .true. + if (present(initdata)) initdata_ = initdata + rdisp_ = 0 + if (present(rdisp)) rdisp_ = rdisp + + if (debug) write(0,*) 'Start xtr_dia_from_coo',nr,nc,nz,nrd,ncd,initdata_, rdisp_ + + if (initdata_) data(1:nrd,1:ncd) = zzero + + do i=1,nz + ir = ia(i) + k = ja(i) - ir + ic = d(nr+k) + if (debug) write(0,*) 'loop xtr_dia_from_coo :',ia(i),ja(i),k,ir-rdisp_,ic + data(ir-rdisp_,ic) = val(i) + enddo + + +end subroutine psi_z_xtr_dia_from_coo diff --git a/ext/impl/psi_z_xtr_ell_from_coo.f90 b/ext/impl/psi_z_xtr_ell_from_coo.f90 new file mode 100644 index 00000000..7133f2ae --- /dev/null +++ b/ext/impl/psi_z_xtr_ell_from_coo.f90 @@ -0,0 +1,63 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +subroutine psi_z_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,valc, & + & ja,val,irn,diag,ld) + use psb_base_mod, only : psb_ipk_, psb_success_, psb_dpk_, zzero + + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_dpk_) :: valc(*), val(ld,*) + + integer(psb_ipk_) :: ii,jj,kk, kc,nc, ir, ic + kc = 1 + do ii = 1, nr + nc = irn(ii) + do jj=1,nc + !if (iac(kc) /= i+ii-1) write(0,*) 'Copy mismatch',iac(kc),i,ii,i+ii-1 + ir = iac(kc) + ic = jac(kc) + if (ir == ic) diag(ii) = jj + ja(ii,jj) = ic + val(ii,jj) = valc(kc) + kc = kc + 1 + end do + ! We are assuming that jac contains at least one valid entry + ! If the previous loop did not have any entries, pick one valid + ! value. + if (nc == 0) ic = jac(1) + do jj = nc+1,mxrwl + ja(ii,jj) = ic + val(ii,jj) = zzero + end do + end do +end subroutine psi_z_xtr_ell_from_coo + diff --git a/ext/psb_c_dia_mat_mod.f90 b/ext/psb_c_dia_mat_mod.f90 new file mode 100644 index 00000000..8311487b --- /dev/null +++ b/ext/psb_c_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_dia_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + complex(psb_spk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => c_dia_get_size + procedure, pass(a) :: get_nzeros => c_dia_get_nzeros + procedure, nopass :: get_fmt => c_dia_get_fmt + procedure, pass(a) :: sizeof => c_dia_sizeof + procedure, pass(a) :: csmm => psb_c_dia_csmm + procedure, pass(a) :: csmv => psb_c_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_c_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_c_dia_cssv + procedure, pass(a) :: scals => psb_c_dia_scals + procedure, pass(a) :: scalv => psb_c_dia_scal + procedure, pass(a) :: maxval => psb_c_dia_maxval + procedure, pass(a) :: rowsum => psb_c_dia_rowsum + procedure, pass(a) :: arwsum => psb_c_dia_arwsum + procedure, pass(a) :: colsum => psb_c_dia_colsum + procedure, pass(a) :: aclsum => psb_c_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_c_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_c_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_c_dia_csput_a + procedure, pass(a) :: get_diag => psb_c_dia_get_diag + procedure, pass(a) :: csgetptn => psb_c_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_c_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => c_dia_get_nz_row + procedure, pass(a) :: reinit => psb_c_dia_reinit + ! procedure, pass(a) :: trim => psb_c_dia_trim + procedure, pass(a) :: print => psb_c_dia_print + procedure, pass(a) :: free => c_dia_free + procedure, pass(a) :: mold => psb_c_dia_mold + + end type psb_c_dia_sparse_mat + + private :: c_dia_get_nzeros, c_dia_free, c_dia_get_fmt, & + & c_dia_sizeof !, c_dia_get_size, c_dia_get_nz_row + + interface + subroutine psb_c_dia_reallocate_nz(nz,a) + import :: psb_c_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dia_sparse_mat), intent(inout) :: a + end subroutine psb_c_dia_reallocate_nz + end interface + + interface + subroutine psb_c_dia_reinit(a,clear) + import :: psb_c_dia_sparse_mat + class(psb_c_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_dia_reinit + end interface + + interface + subroutine psb_c_dia_trim(a) + import :: psb_c_dia_sparse_mat + class(psb_c_dia_sparse_mat), intent(inout) :: a + end subroutine psb_c_dia_trim + end interface + + interface + subroutine psb_c_dia_mold(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_mold + end interface + + interface + subroutine psb_c_dia_allocate_mnnz(m,n,a,nz) + import :: psb_c_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_dia_allocate_mnnz + end interface + + interface + subroutine psb_c_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_dia_print + end interface + + interface + subroutine psb_c_cp_dia_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_dia_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_to_coo + end interface + + interface + subroutine psb_c_cp_dia_from_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_from_coo + end interface + + interface + subroutine psb_c_cp_dia_to_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_to_fmt + end interface + + interface + subroutine psb_c_cp_dia_from_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dia_from_fmt + end interface + + interface + subroutine psb_c_mv_dia_to_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_to_coo + end interface + + interface + subroutine psb_c_mv_dia_from_coo(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_from_coo + end interface + + interface + subroutine psb_c_mv_dia_to_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_to_fmt + end interface + + interface + subroutine psb_c_mv_dia_from_fmt(a,b,info) + import :: psb_c_dia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dia_from_fmt + end interface + + interface + subroutine psb_c_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_csput_a + end interface + + interface + subroutine psb_c_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_dia_csgetptn + end interface + + interface + subroutine psb_c_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_dia_csgetrow + end interface + + interface + subroutine psb_c_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_dia_csgetblk + end interface + + interface + subroutine psb_c_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_cssv + subroutine psb_c_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_cssm + end interface + + interface + subroutine psb_c_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_csmv + subroutine psb_c_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dia_csmm + end interface + + + interface + function psb_c_dia_maxval(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_maxval + end interface + + interface + function psb_c_dia_csnmi(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_csnmi + end interface + + interface + function psb_c_dia_csnm1(a) result(res) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dia_csnm1 + end interface + + interface + subroutine psb_c_dia_rowsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_rowsum + end interface + + interface + subroutine psb_c_dia_arwsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_arwsum + end interface + + interface + subroutine psb_c_dia_colsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_colsum + end interface + + interface + subroutine psb_c_dia_aclsum(d,a) + import :: psb_c_dia_sparse_mat, psb_spk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_dia_aclsum + end interface + + interface + subroutine psb_c_dia_get_diag(a,d,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_get_diag + end interface + + interface + subroutine psb_c_dia_scal(d,a,info,side) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_dia_scal + end interface + + interface + subroutine psb_c_dia_scals(d,a,info) + import :: psb_c_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dia_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_c_convert_dia_from_coo(a,tmp,info) + import :: psb_c_dia_sparse_mat, psb_ipk_, psb_c_coo_sparse_mat + implicit none + class(psb_c_dia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_c_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_dia_sizeof(a) result(res) + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function c_dia_sizeof + + function c_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function c_dia_get_fmt + + function c_dia_get_nzeros(a) result(res) + implicit none + class(psb_c_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function c_dia_get_nzeros + + ! function c_dia_get_size(a) result(res) + ! implicit none + ! class(psb_c_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function c_dia_get_size + + + ! function c_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_c_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function c_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine c_dia_free(a) + implicit none + + class(psb_c_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_dia_free + + +end module psb_c_dia_mat_mod diff --git a/ext/psb_c_dns_mat_mod.f90 b/ext/psb_c_dns_mat_mod.f90 new file mode 100644 index 00000000..5e5a191d --- /dev/null +++ b/ext/psb_c_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_c_dns_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_spk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + complex(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => c_dns_get_size + procedure, pass(a) :: get_nzeros => c_dns_get_nzeros + procedure, nopass :: get_fmt => c_dns_get_fmt + procedure, pass(a) :: sizeof => c_dns_sizeof + procedure, pass(a) :: csmv => psb_c_dns_csmv + procedure, pass(a) :: csmm => psb_c_dns_csmm + procedure, pass(a) :: csnmi => psb_c_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_c_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_c_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_c_dns_get_diag + procedure, pass(a) :: csgetrow => psb_c_dns_csgetrow + procedure, pass(a) :: get_nz_row => c_dns_get_nz_row + procedure, pass(a) :: trim => psb_c_dns_trim + procedure, pass(a) :: free => c_dns_free + procedure, pass(a) :: mold => psb_c_dns_mold + + end type psb_c_dns_sparse_mat + + private :: c_dns_get_nzeros, c_dns_free, c_dns_get_fmt, & + & c_dns_get_size, c_dns_sizeof, c_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_c_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_c_dns_reallocate_nz(nz,a) + import :: psb_c_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_dns_sparse_mat), intent(inout) :: a + end subroutine psb_c_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_c_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_c_dns_trim(a) + import :: psb_c_dns_sparse_mat + class(psb_c_dns_sparse_mat), intent(inout) :: a + end subroutine psb_c_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_c_dns_sparse_mat + !! \brief Allocate a class(psb_c_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_dns_mold(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_c_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_c_dns_allocate_mnnz(m,n,a,nz) + import :: psb_c_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Copy and convert to psb_c_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_cp_dns_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_dns_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Copy and convert from psb_c_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_c_cp_dns_from_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Convert to psb_c_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_c_mv_dns_to_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_c_dns_sparse_mat + !! \brief Convert from psb_c_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_c_mv_dns_from_coo(a,b,info) + import :: psb_c_dns_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_c_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_c_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_c_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_c_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_c_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_c_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_c_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_c_dns_csnmi(a) result(res) + import :: psb_c_dns_sparse_mat, psb_spk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_c_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_c_dns_get_diag(a,d,info) + import :: psb_c_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_dns_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_c_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function c_dns_sizeof(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function c_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_c_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function c_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function c_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_c_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function c_dns_get_nzeros(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function c_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_c_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function c_dns_get_size(a) result(res) + implicit none + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function c_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_c_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function c_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function c_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_c_dns_sparse_mat + !! Name says all + + subroutine c_dns_free(a) + implicit none + + class(psb_c_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_dns_free + + +end module psb_c_dns_mat_mod diff --git a/ext/psb_c_ell_mat_mod.f90 b/ext/psb_c_ell_mat_mod.f90 new file mode 100644 index 00000000..6954946f --- /dev/null +++ b/ext/psb_c_ell_mat_mod.f90 @@ -0,0 +1,552 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_ell_mat_mod + + use psb_c_base_mat_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + complex(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => c_ell_is_by_rows + procedure, pass(a) :: get_size => c_ell_get_size + procedure, pass(a) :: get_nzeros => c_ell_get_nzeros + procedure, nopass :: get_fmt => c_ell_get_fmt + procedure, pass(a) :: sizeof => c_ell_sizeof + procedure, pass(a) :: csmm => psb_c_ell_csmm + procedure, pass(a) :: csmv => psb_c_ell_csmv + procedure, pass(a) :: inner_cssm => psb_c_ell_cssm + procedure, pass(a) :: inner_cssv => psb_c_ell_cssv + procedure, pass(a) :: scals => psb_c_ell_scals + procedure, pass(a) :: scalv => psb_c_ell_scal + procedure, pass(a) :: maxval => psb_c_ell_maxval + procedure, pass(a) :: csnmi => psb_c_ell_csnmi + procedure, pass(a) :: csnm1 => psb_c_ell_csnm1 + procedure, pass(a) :: rowsum => psb_c_ell_rowsum + procedure, pass(a) :: arwsum => psb_c_ell_arwsum + procedure, pass(a) :: colsum => psb_c_ell_colsum + procedure, pass(a) :: aclsum => psb_c_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_c_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_c_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_c_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_c_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_c_ell_csput_a + procedure, pass(a) :: get_diag => psb_c_ell_get_diag + procedure, pass(a) :: csgetptn => psb_c_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_c_ell_csgetrow + procedure, pass(a) :: get_nz_row => c_ell_get_nz_row + procedure, pass(a) :: reinit => psb_c_ell_reinit + procedure, pass(a) :: trim => psb_c_ell_trim + procedure, pass(a) :: print => psb_c_ell_print + procedure, pass(a) :: free => c_ell_free + procedure, pass(a) :: mold => psb_c_ell_mold + procedure, pass(a) :: get_nrm => c_ell_get_nrm + + end type psb_c_ell_sparse_mat + + private :: c_ell_get_nzeros, c_ell_free, c_ell_get_fmt, & + & c_ell_get_size, c_ell_sizeof, c_ell_get_nz_row, & + & c_ell_is_by_rows + + interface + subroutine psb_c_ell_reallocate_nz(nz,a) + import :: psb_c_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_ell_sparse_mat), intent(inout) :: a + end subroutine psb_c_ell_reallocate_nz + end interface + + interface + subroutine psb_c_ell_reinit(a,clear) + import :: psb_c_ell_sparse_mat + class(psb_c_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_ell_reinit + end interface + + interface + subroutine psb_c_ell_trim(a) + import :: psb_c_ell_sparse_mat + class(psb_c_ell_sparse_mat), intent(inout) :: a + end subroutine psb_c_ell_trim + end interface + + interface + subroutine psb_c_ell_mold(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_mold + end interface + + interface + subroutine psb_c_ell_allocate_mnnz(m,n,a,nz) + import :: psb_c_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_ell_allocate_mnnz + end interface + + interface + subroutine psb_c_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_ell_print + end interface + + interface + subroutine psb_c_cp_ell_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_ell_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_to_coo + end interface + + interface + subroutine psb_c_cp_ell_from_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_from_coo + end interface + + interface + subroutine psb_c_cp_ell_to_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_to_fmt + end interface + + interface + subroutine psb_c_cp_ell_from_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_ell_from_fmt + end interface + + interface + subroutine psb_c_mv_ell_to_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_to_coo + end interface + + interface + subroutine psb_c_mv_ell_from_coo(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_from_coo + end interface + + interface + subroutine psb_c_mv_ell_to_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_to_fmt + end interface + + interface + subroutine psb_c_mv_ell_from_fmt(a,b,info) + import :: psb_c_ell_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_ell_from_fmt + end interface + + interface + subroutine psb_c_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_csput_a + end interface + + interface + subroutine psb_c_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_ell_csgetptn + end interface + + interface + subroutine psb_c_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_ell_csgetrow + end interface + + interface + subroutine psb_c_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_ell_csgetblk + end interface + + interface + subroutine psb_c_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_cssv + subroutine psb_c_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_cssm + end interface + + interface + subroutine psb_c_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_csmv + subroutine psb_c_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_ell_csmm + end interface + + + interface + function psb_c_ell_maxval(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_maxval + end interface + + interface + function psb_c_ell_csnmi(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_csnmi + end interface + + interface + function psb_c_ell_csnm1(a) result(res) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_ell_csnm1 + end interface + + interface + subroutine psb_c_ell_rowsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_rowsum + end interface + + interface + subroutine psb_c_ell_arwsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_arwsum + end interface + + interface + subroutine psb_c_ell_colsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_colsum + end interface + + interface + subroutine psb_c_ell_aclsum(d,a) + import :: psb_c_ell_sparse_mat, psb_spk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_ell_aclsum + end interface + + interface + subroutine psb_c_ell_get_diag(a,d,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_get_diag + end interface + + interface + subroutine psb_c_ell_scal(d,a,info,side) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_ell_scal + end interface + + interface + subroutine psb_c_ell_scals(d,a,info) + import :: psb_c_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_ell_scals + end interface + + interface + subroutine psi_c_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_c_ell_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_c_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_c_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_ell_is_by_rows(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function c_ell_is_by_rows + + function c_ell_sizeof(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function c_ell_sizeof + + function c_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function c_ell_get_fmt + + function c_ell_get_nrm(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function c_ell_get_nrm + + function c_ell_get_nzeros(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function c_ell_get_nzeros + + function c_ell_get_size(a) result(res) + implicit none + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function c_ell_get_size + + + function c_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function c_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_ell_free(a) + implicit none + + class(psb_c_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_ell_free + + +end module psb_c_ell_mat_mod diff --git a/ext/psb_c_hdia_mat_mod.f90 b/ext/psb_c_hdia_mat_mod.f90 new file mode 100644 index 00000000..fbac05de --- /dev/null +++ b/ext/psb_c_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_c_hdia_mat_mod + + use psb_c_base_mat_mod + + + type, extends(psb_c_base_sparse_mat) :: psb_c_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + complex(psb_spk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => c_hdia_get_size + procedure, pass(a) :: get_nzeros => c_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => c_hdia_set_nzeros + procedure, nopass :: get_fmt => c_hdia_get_fmt + procedure, pass(a) :: sizeof => c_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_c_hdia_csmm + procedure, pass(a) :: csmv => psb_c_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_c_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_c_hdia_cssv + ! procedure, pass(a) :: scals => psb_c_hdia_scals + ! procedure, pass(a) :: scalv => psb_c_hdia_scal + ! procedure, pass(a) :: maxval => psb_c_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_c_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_c_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_c_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_c_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_c_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_c_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_c_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_c_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_c_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_c_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_c_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_c_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_c_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_c_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_c_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => c_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_c_hdia_reinit + ! procedure, pass(a) :: trim => psb_c_hdia_trim + procedure, pass(a) :: print => psb_c_hdia_print + procedure, pass(a) :: free => c_hdia_free + procedure, pass(a) :: mold => psb_c_hdia_mold + + end type psb_c_hdia_sparse_mat + + private :: c_hdia_get_nzeros, c_hdia_set_nzeros, c_hdia_free, & + & c_hdia_get_fmt, c_hdia_sizeof +!!$ & +!!$ & c_hdia_get_nz_row c_hdia_get_size, + +!!$ interface +!!$ subroutine psb_c_hdia_reallocate_nz(nz,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_reinit(a,clear) +!!$ import :: psb_c_hdia_sparse_mat +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_c_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_trim(a) +!!$ import :: psb_c_hdia_sparse_mat +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_c_hdia_trim +!!$ end interface + + interface + subroutine psb_c_hdia_mold(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hdia_mold + end interface + + interface + subroutine psb_c_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_c_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hdia_allocate_mnnz + end interface + + interface + subroutine psb_c_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_hdia_print + end interface + + interface + subroutine psb_c_cp_hdia_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_hdia_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdia_to_coo + end interface + + interface + subroutine psb_c_cp_hdia_from_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_c_mv_hdia_to_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdia_to_coo + end interface + + interface + subroutine psb_c_mv_hdia_from_coo(a,b,info) + import :: psb_c_hdia_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_c_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_c_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ complex(psb_spk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_c_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_c_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_cssv +!!$ subroutine psb_c_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_cssm +!!$ end interface + + interface + subroutine psb_c_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hdia_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hdia_csmv +!!$ subroutine psb_c_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_c_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_c_hdia_maxval(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_c_hdia_csnmi(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_c_hdia_csnm1(a) result(res) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_c_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_rowsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_arwsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_colsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_aclsum(d,a) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_c_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_get_diag(a,d,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_spk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_c_hdia_scal(d,a,info,side) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_c_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_c_hdia_scals(d,a,info) +!!$ import :: psb_c_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_c_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_c_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * (2*psb_sizeof_sp) + + end function c_hdia_sizeof + + function c_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function c_hdia_get_fmt + + function c_hdia_get_nzeros(a) result(res) + implicit none + class(psb_c_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function c_hdia_get_nzeros + + subroutine c_hdia_set_nzeros(a,nz) + implicit none + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine c_hdia_set_nzeros + + ! function c_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_c_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function c_hdia_get_size + + + ! function c_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_c_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function c_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine c_hdia_free(a) + implicit none + + class(psb_c_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine c_hdia_free + + +end module psb_c_hdia_mat_mod diff --git a/ext/psb_c_hll_mat_mod.f90 b/ext/psb_c_hll_mat_mod.f90 new file mode 100644 index 00000000..966b60f5 --- /dev/null +++ b/ext/psb_c_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_c_hll_mat_mod + + use psb_c_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_c_base_sparse_mat) :: psb_c_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + complex(psb_spk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => c_hll_get_hksz + procedure, pass(a) :: set_hksz => c_hll_set_hksz + procedure, pass(a) :: get_size => c_hll_get_size + procedure, pass(a) :: set_nzeros => c_hll_set_nzeros + procedure, pass(a) :: get_nzeros => c_hll_get_nzeros + procedure, nopass :: get_fmt => c_hll_get_fmt + procedure, pass(a) :: sizeof => c_hll_sizeof + procedure, pass(a) :: csmm => psb_c_hll_csmm + procedure, pass(a) :: csmv => psb_c_hll_csmv + procedure, pass(a) :: inner_cssm => psb_c_hll_cssm + procedure, pass(a) :: inner_cssv => psb_c_hll_cssv + procedure, pass(a) :: scals => psb_c_hll_scals + procedure, pass(a) :: scalv => psb_c_hll_scal + procedure, pass(a) :: maxval => psb_c_hll_maxval + procedure, pass(a) :: csnmi => psb_c_hll_csnmi + procedure, pass(a) :: csnm1 => psb_c_hll_csnm1 + procedure, pass(a) :: rowsum => psb_c_hll_rowsum + procedure, pass(a) :: arwsum => psb_c_hll_arwsum + procedure, pass(a) :: colsum => psb_c_hll_colsum + procedure, pass(a) :: aclsum => psb_c_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_c_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_c_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_c_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_c_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_c_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_c_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_c_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_c_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_c_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_c_hll_csput_a + procedure, pass(a) :: get_diag => psb_c_hll_get_diag + procedure, pass(a) :: csgetptn => psb_c_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_c_hll_csgetrow + procedure, pass(a) :: get_nz_row => c_hll_get_nz_row + procedure, pass(a) :: reinit => psb_c_hll_reinit + procedure, pass(a) :: print => psb_c_hll_print + procedure, pass(a) :: free => c_hll_free + procedure, pass(a) :: mold => psb_c_hll_mold + + end type psb_c_hll_sparse_mat + + private :: c_hll_get_nzeros, c_hll_free, c_hll_get_fmt, & + & c_hll_get_size, c_hll_sizeof, c_hll_get_nz_row, & + & c_hll_set_nzeros, c_hll_get_hksz, c_hll_set_hksz + + interface + subroutine psb_c_hll_reallocate_nz(nz,a) + import :: psb_c_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_c_hll_sparse_mat), intent(inout) :: a + end subroutine psb_c_hll_reallocate_nz + end interface + + interface + subroutine psb_c_hll_reinit(a,clear) + import :: psb_c_hll_sparse_mat + class(psb_c_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_c_hll_reinit + end interface + + interface + subroutine psb_c_hll_mold(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_mold + end interface + + interface + subroutine psb_c_hll_allocate_mnnz(m,n,a,nz) + import :: psb_c_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_hll_allocate_mnnz + end interface + + interface + subroutine psb_c_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_c_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_c_hll_print + end interface + + interface + subroutine psb_c_cp_hll_to_coo(a,b,info) + import :: psb_c_coo_sparse_mat, psb_c_hll_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_to_coo + end interface + + interface + subroutine psb_c_cp_hll_from_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_from_coo + end interface + + interface + subroutine psb_c_cp_hll_to_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_to_fmt + end interface + + interface + subroutine psb_c_cp_hll_from_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_cp_hll_from_fmt + end interface + + interface + subroutine psb_c_mv_hll_to_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_to_coo + end interface + + interface + subroutine psb_c_mv_hll_from_coo(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_from_coo + end interface + + interface + subroutine psb_c_mv_hll_to_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_to_fmt + end interface + + interface + subroutine psb_c_mv_hll_from_fmt(a,b,info) + import :: psb_c_hll_sparse_mat, psb_c_base_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_mv_hll_from_fmt + end interface + + interface + subroutine psb_c_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_csput_a + end interface + + interface + subroutine psb_c_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_hll_csgetptn + end interface + + interface + subroutine psb_c_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_c_hll_csgetrow + end interface + + interface + subroutine psb_c_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_c_coo_sparse_mat, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_c_hll_csgetblk + end interface + + interface + subroutine psb_c_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_cssv + subroutine psb_c_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_cssm + end interface + + interface + subroutine psb_c_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_csmv + subroutine psb_c_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_hll_csmm + end interface + + + interface + function psb_c_hll_maxval(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_maxval + end interface + + interface + function psb_c_hll_csnmi(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_csnmi + end interface + + interface + function psb_c_hll_csnm1(a) result(res) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_c_hll_csnm1 + end interface + + interface + subroutine psb_c_hll_rowsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_rowsum + end interface + + interface + subroutine psb_c_hll_arwsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_arwsum + end interface + + interface + subroutine psb_c_hll_colsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_colsum + end interface + + interface + subroutine psb_c_hll_aclsum(d,a) + import :: psb_c_hll_sparse_mat, psb_spk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_c_hll_aclsum + end interface + + interface + subroutine psb_c_hll_get_diag(a,d,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_get_diag + end interface + + interface + subroutine psb_c_hll_scal(d,a,info,side) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_c_hll_scal + end interface + + interface + subroutine psb_c_hll_scals(d,a,info) + import :: psb_c_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_c_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_c_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_c_hll_sparse_mat, psb_ipk_, psb_c_coo_sparse_mat + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_c_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_c_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function c_hll_sizeof(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function c_hll_sizeof + + function c_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function c_hll_get_fmt + + subroutine c_hll_set_nzeros(a,n) + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine c_hll_set_nzeros + + function c_hll_get_nzeros(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function c_hll_get_nzeros + + function c_hll_get_size(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function c_hll_get_size + + + + function c_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function c_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine c_hll_free(a) + implicit none + + class(psb_c_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine c_hll_free + + subroutine c_hll_set_hksz(a,n) + implicit none + class(psb_c_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine c_hll_set_hksz + + function c_hll_get_hksz(a) result(res) + implicit none + class(psb_c_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function c_hll_get_hksz + +end module psb_c_hll_mat_mod diff --git a/ext/psb_d_dia_mat_mod.f90 b/ext/psb_d_dia_mat_mod.f90 new file mode 100644 index 00000000..7df615ac --- /dev/null +++ b/ext/psb_d_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_dia_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_dpk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => d_dia_get_size + procedure, pass(a) :: get_nzeros => d_dia_get_nzeros + procedure, nopass :: get_fmt => d_dia_get_fmt + procedure, pass(a) :: sizeof => d_dia_sizeof + procedure, pass(a) :: csmm => psb_d_dia_csmm + procedure, pass(a) :: csmv => psb_d_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_dia_cssv + procedure, pass(a) :: scals => psb_d_dia_scals + procedure, pass(a) :: scalv => psb_d_dia_scal + procedure, pass(a) :: maxval => psb_d_dia_maxval + procedure, pass(a) :: rowsum => psb_d_dia_rowsum + procedure, pass(a) :: arwsum => psb_d_dia_arwsum + procedure, pass(a) :: colsum => psb_d_dia_colsum + procedure, pass(a) :: aclsum => psb_d_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_d_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_d_dia_csput_a + procedure, pass(a) :: get_diag => psb_d_dia_get_diag + procedure, pass(a) :: csgetptn => psb_d_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_d_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => d_dia_get_nz_row + procedure, pass(a) :: reinit => psb_d_dia_reinit + ! procedure, pass(a) :: trim => psb_d_dia_trim + procedure, pass(a) :: print => psb_d_dia_print + procedure, pass(a) :: free => d_dia_free + procedure, pass(a) :: mold => psb_d_dia_mold + + end type psb_d_dia_sparse_mat + + private :: d_dia_get_nzeros, d_dia_free, d_dia_get_fmt, & + & d_dia_sizeof !, d_dia_get_size, d_dia_get_nz_row + + interface + subroutine psb_d_dia_reallocate_nz(nz,a) + import :: psb_d_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dia_sparse_mat), intent(inout) :: a + end subroutine psb_d_dia_reallocate_nz + end interface + + interface + subroutine psb_d_dia_reinit(a,clear) + import :: psb_d_dia_sparse_mat + class(psb_d_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_dia_reinit + end interface + + interface + subroutine psb_d_dia_trim(a) + import :: psb_d_dia_sparse_mat + class(psb_d_dia_sparse_mat), intent(inout) :: a + end subroutine psb_d_dia_trim + end interface + + interface + subroutine psb_d_dia_mold(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_mold + end interface + + interface + subroutine psb_d_dia_allocate_mnnz(m,n,a,nz) + import :: psb_d_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_dia_allocate_mnnz + end interface + + interface + subroutine psb_d_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_dia_print + end interface + + interface + subroutine psb_d_cp_dia_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_dia_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_to_coo + end interface + + interface + subroutine psb_d_cp_dia_from_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_from_coo + end interface + + interface + subroutine psb_d_cp_dia_to_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_to_fmt + end interface + + interface + subroutine psb_d_cp_dia_from_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dia_from_fmt + end interface + + interface + subroutine psb_d_mv_dia_to_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_to_coo + end interface + + interface + subroutine psb_d_mv_dia_from_coo(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_from_coo + end interface + + interface + subroutine psb_d_mv_dia_to_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_to_fmt + end interface + + interface + subroutine psb_d_mv_dia_from_fmt(a,b,info) + import :: psb_d_dia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dia_from_fmt + end interface + + interface + subroutine psb_d_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_csput_a + end interface + + interface + subroutine psb_d_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_dia_csgetptn + end interface + + interface + subroutine psb_d_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_dia_csgetrow + end interface + + interface + subroutine psb_d_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_dia_csgetblk + end interface + + interface + subroutine psb_d_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_cssv + subroutine psb_d_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_cssm + end interface + + interface + subroutine psb_d_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_csmv + subroutine psb_d_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dia_csmm + end interface + + + interface + function psb_d_dia_maxval(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_maxval + end interface + + interface + function psb_d_dia_csnmi(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_csnmi + end interface + + interface + function psb_d_dia_csnm1(a) result(res) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dia_csnm1 + end interface + + interface + subroutine psb_d_dia_rowsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_rowsum + end interface + + interface + subroutine psb_d_dia_arwsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_arwsum + end interface + + interface + subroutine psb_d_dia_colsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_colsum + end interface + + interface + subroutine psb_d_dia_aclsum(d,a) + import :: psb_d_dia_sparse_mat, psb_dpk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_dia_aclsum + end interface + + interface + subroutine psb_d_dia_get_diag(a,d,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_get_diag + end interface + + interface + subroutine psb_d_dia_scal(d,a,info,side) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_dia_scal + end interface + + interface + subroutine psb_d_dia_scals(d,a,info) + import :: psb_d_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dia_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_d_convert_dia_from_coo(a,tmp,info) + import :: psb_d_dia_sparse_mat, psb_ipk_, psb_d_coo_sparse_mat + implicit none + class(psb_d_dia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_d_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_dia_sizeof(a) result(res) + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function d_dia_sizeof + + function d_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function d_dia_get_fmt + + function d_dia_get_nzeros(a) result(res) + implicit none + class(psb_d_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function d_dia_get_nzeros + + ! function d_dia_get_size(a) result(res) + ! implicit none + ! class(psb_d_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function d_dia_get_size + + + ! function d_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_d_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function d_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine d_dia_free(a) + implicit none + + class(psb_d_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_dia_free + + +end module psb_d_dia_mat_mod diff --git a/ext/psb_d_dns_mat_mod.f90 b/ext/psb_d_dns_mat_mod.f90 new file mode 100644 index 00000000..f8c977bc --- /dev/null +++ b/ext/psb_d_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_d_dns_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_dpk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + real(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => d_dns_get_size + procedure, pass(a) :: get_nzeros => d_dns_get_nzeros + procedure, nopass :: get_fmt => d_dns_get_fmt + procedure, pass(a) :: sizeof => d_dns_sizeof + procedure, pass(a) :: csmv => psb_d_dns_csmv + procedure, pass(a) :: csmm => psb_d_dns_csmm + procedure, pass(a) :: csnmi => psb_d_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_d_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_d_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_d_dns_get_diag + procedure, pass(a) :: csgetrow => psb_d_dns_csgetrow + procedure, pass(a) :: get_nz_row => d_dns_get_nz_row + procedure, pass(a) :: trim => psb_d_dns_trim + procedure, pass(a) :: free => d_dns_free + procedure, pass(a) :: mold => psb_d_dns_mold + + end type psb_d_dns_sparse_mat + + private :: d_dns_get_nzeros, d_dns_free, d_dns_get_fmt, & + & d_dns_get_size, d_dns_sizeof, d_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_d_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_d_dns_reallocate_nz(nz,a) + import :: psb_d_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_dns_sparse_mat), intent(inout) :: a + end subroutine psb_d_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_d_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_d_dns_trim(a) + import :: psb_d_dns_sparse_mat + class(psb_d_dns_sparse_mat), intent(inout) :: a + end subroutine psb_d_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_d_dns_sparse_mat + !! \brief Allocate a class(psb_d_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_dns_mold(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_d_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_d_dns_allocate_mnnz(m,n,a,nz) + import :: psb_d_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Copy and convert to psb_d_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_cp_dns_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_dns_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Copy and convert from psb_d_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_d_cp_dns_from_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Convert to psb_d_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_d_mv_dns_to_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_d_dns_sparse_mat + !! \brief Convert from psb_d_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_d_mv_dns_from_coo(a,b,info) + import :: psb_d_dns_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_d_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_d_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_d_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_d_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_d_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_d_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_d_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_d_dns_csnmi(a) result(res) + import :: psb_d_dns_sparse_mat, psb_dpk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_d_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_d_dns_get_diag(a,d,info) + import :: psb_d_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_dns_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_d_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function d_dns_sizeof(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function d_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_d_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function d_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function d_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_d_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function d_dns_get_nzeros(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function d_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_d_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function d_dns_get_size(a) result(res) + implicit none + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function d_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_d_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function d_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function d_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_d_dns_sparse_mat + !! Name says all + + subroutine d_dns_free(a) + implicit none + + class(psb_d_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_dns_free + + +end module psb_d_dns_mat_mod diff --git a/ext/psb_d_ell_mat_mod.f90 b/ext/psb_d_ell_mat_mod.f90 new file mode 100644 index 00000000..cc945baf --- /dev/null +++ b/ext/psb_d_ell_mat_mod.f90 @@ -0,0 +1,552 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_ell_mat_mod + + use psb_d_base_mat_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => d_ell_is_by_rows + procedure, pass(a) :: get_size => d_ell_get_size + procedure, pass(a) :: get_nzeros => d_ell_get_nzeros + procedure, nopass :: get_fmt => d_ell_get_fmt + procedure, pass(a) :: sizeof => d_ell_sizeof + procedure, pass(a) :: csmm => psb_d_ell_csmm + procedure, pass(a) :: csmv => psb_d_ell_csmv + procedure, pass(a) :: inner_cssm => psb_d_ell_cssm + procedure, pass(a) :: inner_cssv => psb_d_ell_cssv + procedure, pass(a) :: scals => psb_d_ell_scals + procedure, pass(a) :: scalv => psb_d_ell_scal + procedure, pass(a) :: maxval => psb_d_ell_maxval + procedure, pass(a) :: csnmi => psb_d_ell_csnmi + procedure, pass(a) :: csnm1 => psb_d_ell_csnm1 + procedure, pass(a) :: rowsum => psb_d_ell_rowsum + procedure, pass(a) :: arwsum => psb_d_ell_arwsum + procedure, pass(a) :: colsum => psb_d_ell_colsum + procedure, pass(a) :: aclsum => psb_d_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_d_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_d_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_d_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_d_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_d_ell_csput_a + procedure, pass(a) :: get_diag => psb_d_ell_get_diag + procedure, pass(a) :: csgetptn => psb_d_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_d_ell_csgetrow + procedure, pass(a) :: get_nz_row => d_ell_get_nz_row + procedure, pass(a) :: reinit => psb_d_ell_reinit + procedure, pass(a) :: trim => psb_d_ell_trim + procedure, pass(a) :: print => psb_d_ell_print + procedure, pass(a) :: free => d_ell_free + procedure, pass(a) :: mold => psb_d_ell_mold + procedure, pass(a) :: get_nrm => d_ell_get_nrm + + end type psb_d_ell_sparse_mat + + private :: d_ell_get_nzeros, d_ell_free, d_ell_get_fmt, & + & d_ell_get_size, d_ell_sizeof, d_ell_get_nz_row, & + & d_ell_is_by_rows + + interface + subroutine psb_d_ell_reallocate_nz(nz,a) + import :: psb_d_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_ell_sparse_mat), intent(inout) :: a + end subroutine psb_d_ell_reallocate_nz + end interface + + interface + subroutine psb_d_ell_reinit(a,clear) + import :: psb_d_ell_sparse_mat + class(psb_d_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_ell_reinit + end interface + + interface + subroutine psb_d_ell_trim(a) + import :: psb_d_ell_sparse_mat + class(psb_d_ell_sparse_mat), intent(inout) :: a + end subroutine psb_d_ell_trim + end interface + + interface + subroutine psb_d_ell_mold(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_mold + end interface + + interface + subroutine psb_d_ell_allocate_mnnz(m,n,a,nz) + import :: psb_d_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_ell_allocate_mnnz + end interface + + interface + subroutine psb_d_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_ell_print + end interface + + interface + subroutine psb_d_cp_ell_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_ell_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_to_coo + end interface + + interface + subroutine psb_d_cp_ell_from_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_from_coo + end interface + + interface + subroutine psb_d_cp_ell_to_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_to_fmt + end interface + + interface + subroutine psb_d_cp_ell_from_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_ell_from_fmt + end interface + + interface + subroutine psb_d_mv_ell_to_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_to_coo + end interface + + interface + subroutine psb_d_mv_ell_from_coo(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_from_coo + end interface + + interface + subroutine psb_d_mv_ell_to_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_to_fmt + end interface + + interface + subroutine psb_d_mv_ell_from_fmt(a,b,info) + import :: psb_d_ell_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_ell_from_fmt + end interface + + interface + subroutine psb_d_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_csput_a + end interface + + interface + subroutine psb_d_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_ell_csgetptn + end interface + + interface + subroutine psb_d_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_ell_csgetrow + end interface + + interface + subroutine psb_d_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_ell_csgetblk + end interface + + interface + subroutine psb_d_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_cssv + subroutine psb_d_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_cssm + end interface + + interface + subroutine psb_d_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_csmv + subroutine psb_d_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_ell_csmm + end interface + + + interface + function psb_d_ell_maxval(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_maxval + end interface + + interface + function psb_d_ell_csnmi(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_csnmi + end interface + + interface + function psb_d_ell_csnm1(a) result(res) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_ell_csnm1 + end interface + + interface + subroutine psb_d_ell_rowsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_rowsum + end interface + + interface + subroutine psb_d_ell_arwsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_arwsum + end interface + + interface + subroutine psb_d_ell_colsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_colsum + end interface + + interface + subroutine psb_d_ell_aclsum(d,a) + import :: psb_d_ell_sparse_mat, psb_dpk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_ell_aclsum + end interface + + interface + subroutine psb_d_ell_get_diag(a,d,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_get_diag + end interface + + interface + subroutine psb_d_ell_scal(d,a,info,side) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_ell_scal + end interface + + interface + subroutine psb_d_ell_scals(d,a,info) + import :: psb_d_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_ell_scals + end interface + + interface + subroutine psi_d_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_d_ell_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_d_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_d_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_ell_is_by_rows(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function d_ell_is_by_rows + + function d_ell_sizeof(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function d_ell_sizeof + + function d_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function d_ell_get_fmt + + function d_ell_get_nrm(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function d_ell_get_nrm + + function d_ell_get_nzeros(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function d_ell_get_nzeros + + function d_ell_get_size(a) result(res) + implicit none + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function d_ell_get_size + + + function d_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function d_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_ell_free(a) + implicit none + + class(psb_d_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_ell_free + + +end module psb_d_ell_mat_mod diff --git a/ext/psb_d_hdia_mat_mod.f90 b/ext/psb_d_hdia_mat_mod.f90 new file mode 100644 index 00000000..25bc6898 --- /dev/null +++ b/ext/psb_d_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_d_hdia_mat_mod + + use psb_d_base_mat_mod + + + type, extends(psb_d_base_sparse_mat) :: psb_d_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + real(psb_dpk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => d_hdia_get_size + procedure, pass(a) :: get_nzeros => d_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => d_hdia_set_nzeros + procedure, nopass :: get_fmt => d_hdia_get_fmt + procedure, pass(a) :: sizeof => d_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_d_hdia_csmm + procedure, pass(a) :: csmv => psb_d_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_hdia_cssv + ! procedure, pass(a) :: scals => psb_d_hdia_scals + ! procedure, pass(a) :: scalv => psb_d_hdia_scal + ! procedure, pass(a) :: maxval => psb_d_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_d_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_d_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_d_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_d_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_d_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_d_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_d_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_d_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_d_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_d_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_d_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_d_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => d_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_d_hdia_reinit + ! procedure, pass(a) :: trim => psb_d_hdia_trim + procedure, pass(a) :: print => psb_d_hdia_print + procedure, pass(a) :: free => d_hdia_free + procedure, pass(a) :: mold => psb_d_hdia_mold + + end type psb_d_hdia_sparse_mat + + private :: d_hdia_get_nzeros, d_hdia_set_nzeros, d_hdia_free, & + & d_hdia_get_fmt, d_hdia_sizeof +!!$ & +!!$ & d_hdia_get_nz_row d_hdia_get_size, + +!!$ interface +!!$ subroutine psb_d_hdia_reallocate_nz(nz,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_reinit(a,clear) +!!$ import :: psb_d_hdia_sparse_mat +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_d_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_trim(a) +!!$ import :: psb_d_hdia_sparse_mat +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_d_hdia_trim +!!$ end interface + + interface + subroutine psb_d_hdia_mold(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hdia_mold + end interface + + interface + subroutine psb_d_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_d_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hdia_allocate_mnnz + end interface + + interface + subroutine psb_d_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_hdia_print + end interface + + interface + subroutine psb_d_cp_hdia_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_hdia_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdia_to_coo + end interface + + interface + subroutine psb_d_cp_hdia_from_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_d_mv_hdia_to_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdia_to_coo + end interface + + interface + subroutine psb_d_mv_hdia_from_coo(a,b,info) + import :: psb_d_hdia_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_d_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_d_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ real(psb_dpk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_d_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_d_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_cssv +!!$ subroutine psb_d_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_cssm +!!$ end interface + + interface + subroutine psb_d_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hdia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hdia_csmv +!!$ subroutine psb_d_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_d_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_d_hdia_maxval(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_d_hdia_csnmi(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_d_hdia_csnm1(a) result(res) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_d_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_rowsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_arwsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_colsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_aclsum(d,a) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_d_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_get_diag(a,d,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_d_hdia_scal(d,a,info,side) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_d_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_d_hdia_scals(d,a,info) +!!$ import :: psb_d_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_d_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_d_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * psb_sizeof_dp + + end function d_hdia_sizeof + + function d_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function d_hdia_get_fmt + + function d_hdia_get_nzeros(a) result(res) + implicit none + class(psb_d_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function d_hdia_get_nzeros + + subroutine d_hdia_set_nzeros(a,nz) + implicit none + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine d_hdia_set_nzeros + + ! function d_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_d_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function d_hdia_get_size + + + ! function d_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_d_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function d_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine d_hdia_free(a) + implicit none + + class(psb_d_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine d_hdia_free + + +end module psb_d_hdia_mat_mod diff --git a/ext/psb_d_hll_mat_mod.f90 b/ext/psb_d_hll_mat_mod.f90 new file mode 100644 index 00000000..acc3b312 --- /dev/null +++ b/ext/psb_d_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_d_hll_mat_mod + + use psb_d_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_d_base_sparse_mat) :: psb_d_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_dpk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => d_hll_get_hksz + procedure, pass(a) :: set_hksz => d_hll_set_hksz + procedure, pass(a) :: get_size => d_hll_get_size + procedure, pass(a) :: set_nzeros => d_hll_set_nzeros + procedure, pass(a) :: get_nzeros => d_hll_get_nzeros + procedure, nopass :: get_fmt => d_hll_get_fmt + procedure, pass(a) :: sizeof => d_hll_sizeof + procedure, pass(a) :: csmm => psb_d_hll_csmm + procedure, pass(a) :: csmv => psb_d_hll_csmv + procedure, pass(a) :: inner_cssm => psb_d_hll_cssm + procedure, pass(a) :: inner_cssv => psb_d_hll_cssv + procedure, pass(a) :: scals => psb_d_hll_scals + procedure, pass(a) :: scalv => psb_d_hll_scal + procedure, pass(a) :: maxval => psb_d_hll_maxval + procedure, pass(a) :: csnmi => psb_d_hll_csnmi + procedure, pass(a) :: csnm1 => psb_d_hll_csnm1 + procedure, pass(a) :: rowsum => psb_d_hll_rowsum + procedure, pass(a) :: arwsum => psb_d_hll_arwsum + procedure, pass(a) :: colsum => psb_d_hll_colsum + procedure, pass(a) :: aclsum => psb_d_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_d_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_d_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_d_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_d_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_d_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_d_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_d_hll_csput_a + procedure, pass(a) :: get_diag => psb_d_hll_get_diag + procedure, pass(a) :: csgetptn => psb_d_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_d_hll_csgetrow + procedure, pass(a) :: get_nz_row => d_hll_get_nz_row + procedure, pass(a) :: reinit => psb_d_hll_reinit + procedure, pass(a) :: print => psb_d_hll_print + procedure, pass(a) :: free => d_hll_free + procedure, pass(a) :: mold => psb_d_hll_mold + + end type psb_d_hll_sparse_mat + + private :: d_hll_get_nzeros, d_hll_free, d_hll_get_fmt, & + & d_hll_get_size, d_hll_sizeof, d_hll_get_nz_row, & + & d_hll_set_nzeros, d_hll_get_hksz, d_hll_set_hksz + + interface + subroutine psb_d_hll_reallocate_nz(nz,a) + import :: psb_d_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_hll_sparse_mat), intent(inout) :: a + end subroutine psb_d_hll_reallocate_nz + end interface + + interface + subroutine psb_d_hll_reinit(a,clear) + import :: psb_d_hll_sparse_mat + class(psb_d_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_hll_reinit + end interface + + interface + subroutine psb_d_hll_mold(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_mold + end interface + + interface + subroutine psb_d_hll_allocate_mnnz(m,n,a,nz) + import :: psb_d_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_hll_allocate_mnnz + end interface + + interface + subroutine psb_d_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_hll_print + end interface + + interface + subroutine psb_d_cp_hll_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_hll_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_to_coo + end interface + + interface + subroutine psb_d_cp_hll_from_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_from_coo + end interface + + interface + subroutine psb_d_cp_hll_to_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_to_fmt + end interface + + interface + subroutine psb_d_cp_hll_from_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_hll_from_fmt + end interface + + interface + subroutine psb_d_mv_hll_to_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_to_coo + end interface + + interface + subroutine psb_d_mv_hll_from_coo(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_from_coo + end interface + + interface + subroutine psb_d_mv_hll_to_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_to_fmt + end interface + + interface + subroutine psb_d_mv_hll_from_fmt(a,b,info) + import :: psb_d_hll_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_hll_from_fmt + end interface + + interface + subroutine psb_d_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_csput_a + end interface + + interface + subroutine psb_d_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_hll_csgetptn + end interface + + interface + subroutine psb_d_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_d_hll_csgetrow + end interface + + interface + subroutine psb_d_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_hll_csgetblk + end interface + + interface + subroutine psb_d_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_cssv + subroutine psb_d_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_cssm + end interface + + interface + subroutine psb_d_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_csmv + subroutine psb_d_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_hll_csmm + end interface + + + interface + function psb_d_hll_maxval(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_maxval + end interface + + interface + function psb_d_hll_csnmi(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_csnmi + end interface + + interface + function psb_d_hll_csnm1(a) result(res) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_hll_csnm1 + end interface + + interface + subroutine psb_d_hll_rowsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_rowsum + end interface + + interface + subroutine psb_d_hll_arwsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_arwsum + end interface + + interface + subroutine psb_d_hll_colsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_colsum + end interface + + interface + subroutine psb_d_hll_aclsum(d,a) + import :: psb_d_hll_sparse_mat, psb_dpk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_hll_aclsum + end interface + + interface + subroutine psb_d_hll_get_diag(a,d,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_get_diag + end interface + + interface + subroutine psb_d_hll_scal(d,a,info,side) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_hll_scal + end interface + + interface + subroutine psb_d_hll_scals(d,a,info) + import :: psb_d_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_d_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_d_hll_sparse_mat, psb_ipk_, psb_d_coo_sparse_mat + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_d_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_d_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_hll_sizeof(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function d_hll_sizeof + + function d_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function d_hll_get_fmt + + subroutine d_hll_set_nzeros(a,n) + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine d_hll_set_nzeros + + function d_hll_get_nzeros(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function d_hll_get_nzeros + + function d_hll_get_size(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function d_hll_get_size + + + + function d_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function d_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_hll_free(a) + implicit none + + class(psb_d_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine d_hll_free + + subroutine d_hll_set_hksz(a,n) + implicit none + class(psb_d_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine d_hll_set_hksz + + function d_hll_get_hksz(a) result(res) + implicit none + class(psb_d_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function d_hll_get_hksz + +end module psb_d_hll_mat_mod diff --git a/ext/psb_ext_mod.F90 b/ext/psb_ext_mod.F90 new file mode 100644 index 00000000..b1dbdb59 --- /dev/null +++ b/ext/psb_ext_mod.F90 @@ -0,0 +1,65 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_ext_mod + use psb_const_mod + use psi_ext_util_mod + + use psb_s_dns_mat_mod + use psb_d_dns_mat_mod + use psb_c_dns_mat_mod + use psb_z_dns_mat_mod + + use psb_d_ell_mat_mod + use psb_s_ell_mat_mod + use psb_z_ell_mat_mod + use psb_c_ell_mat_mod + + use psb_s_hll_mat_mod + use psb_d_hll_mat_mod + use psb_c_hll_mat_mod + use psb_z_hll_mat_mod + + use psb_s_dia_mat_mod + use psb_d_dia_mat_mod + use psb_c_dia_mat_mod + use psb_z_dia_mat_mod + + use psb_s_hdia_mat_mod + use psb_d_hdia_mat_mod + use psb_c_hdia_mat_mod + use psb_z_hdia_mat_mod + +#ifdef HAVE_RSB + use psb_d_rsb_mat_mod +#endif +end module psb_ext_mod diff --git a/ext/psb_s_dia_mat_mod.f90 b/ext/psb_s_dia_mat_mod.f90 new file mode 100644 index 00000000..3a11d959 --- /dev/null +++ b/ext/psb_s_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_dia_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + real(psb_spk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => s_dia_get_size + procedure, pass(a) :: get_nzeros => s_dia_get_nzeros + procedure, nopass :: get_fmt => s_dia_get_fmt + procedure, pass(a) :: sizeof => s_dia_sizeof + procedure, pass(a) :: csmm => psb_s_dia_csmm + procedure, pass(a) :: csmv => psb_s_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_s_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_s_dia_cssv + procedure, pass(a) :: scals => psb_s_dia_scals + procedure, pass(a) :: scalv => psb_s_dia_scal + procedure, pass(a) :: maxval => psb_s_dia_maxval + procedure, pass(a) :: rowsum => psb_s_dia_rowsum + procedure, pass(a) :: arwsum => psb_s_dia_arwsum + procedure, pass(a) :: colsum => psb_s_dia_colsum + procedure, pass(a) :: aclsum => psb_s_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_s_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_s_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_s_dia_csput_a + procedure, pass(a) :: get_diag => psb_s_dia_get_diag + procedure, pass(a) :: csgetptn => psb_s_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_s_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => s_dia_get_nz_row + procedure, pass(a) :: reinit => psb_s_dia_reinit + ! procedure, pass(a) :: trim => psb_s_dia_trim + procedure, pass(a) :: print => psb_s_dia_print + procedure, pass(a) :: free => s_dia_free + procedure, pass(a) :: mold => psb_s_dia_mold + + end type psb_s_dia_sparse_mat + + private :: s_dia_get_nzeros, s_dia_free, s_dia_get_fmt, & + & s_dia_sizeof !, s_dia_get_size, s_dia_get_nz_row + + interface + subroutine psb_s_dia_reallocate_nz(nz,a) + import :: psb_s_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dia_sparse_mat), intent(inout) :: a + end subroutine psb_s_dia_reallocate_nz + end interface + + interface + subroutine psb_s_dia_reinit(a,clear) + import :: psb_s_dia_sparse_mat + class(psb_s_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_dia_reinit + end interface + + interface + subroutine psb_s_dia_trim(a) + import :: psb_s_dia_sparse_mat + class(psb_s_dia_sparse_mat), intent(inout) :: a + end subroutine psb_s_dia_trim + end interface + + interface + subroutine psb_s_dia_mold(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_mold + end interface + + interface + subroutine psb_s_dia_allocate_mnnz(m,n,a,nz) + import :: psb_s_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_dia_allocate_mnnz + end interface + + interface + subroutine psb_s_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_dia_print + end interface + + interface + subroutine psb_s_cp_dia_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_dia_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_to_coo + end interface + + interface + subroutine psb_s_cp_dia_from_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_from_coo + end interface + + interface + subroutine psb_s_cp_dia_to_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_to_fmt + end interface + + interface + subroutine psb_s_cp_dia_from_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dia_from_fmt + end interface + + interface + subroutine psb_s_mv_dia_to_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_to_coo + end interface + + interface + subroutine psb_s_mv_dia_from_coo(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_from_coo + end interface + + interface + subroutine psb_s_mv_dia_to_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_to_fmt + end interface + + interface + subroutine psb_s_mv_dia_from_fmt(a,b,info) + import :: psb_s_dia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dia_from_fmt + end interface + + interface + subroutine psb_s_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_csput_a + end interface + + interface + subroutine psb_s_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_dia_csgetptn + end interface + + interface + subroutine psb_s_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_dia_csgetrow + end interface + + interface + subroutine psb_s_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_dia_csgetblk + end interface + + interface + subroutine psb_s_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_cssv + subroutine psb_s_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_cssm + end interface + + interface + subroutine psb_s_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_csmv + subroutine psb_s_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dia_csmm + end interface + + + interface + function psb_s_dia_maxval(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_maxval + end interface + + interface + function psb_s_dia_csnmi(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_csnmi + end interface + + interface + function psb_s_dia_csnm1(a) result(res) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dia_csnm1 + end interface + + interface + subroutine psb_s_dia_rowsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_rowsum + end interface + + interface + subroutine psb_s_dia_arwsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_arwsum + end interface + + interface + subroutine psb_s_dia_colsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_colsum + end interface + + interface + subroutine psb_s_dia_aclsum(d,a) + import :: psb_s_dia_sparse_mat, psb_spk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_dia_aclsum + end interface + + interface + subroutine psb_s_dia_get_diag(a,d,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_get_diag + end interface + + interface + subroutine psb_s_dia_scal(d,a,info,side) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_dia_scal + end interface + + interface + subroutine psb_s_dia_scals(d,a,info) + import :: psb_s_dia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dia_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_s_convert_dia_from_coo(a,tmp,info) + import :: psb_s_dia_sparse_mat, psb_ipk_, psb_s_coo_sparse_mat + implicit none + class(psb_s_dia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_s_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_dia_sizeof(a) result(res) + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function s_dia_sizeof + + function s_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function s_dia_get_fmt + + function s_dia_get_nzeros(a) result(res) + implicit none + class(psb_s_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function s_dia_get_nzeros + + ! function s_dia_get_size(a) result(res) + ! implicit none + ! class(psb_s_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function s_dia_get_size + + + ! function s_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_s_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function s_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine s_dia_free(a) + implicit none + + class(psb_s_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_dia_free + + +end module psb_s_dia_mat_mod diff --git a/ext/psb_s_dns_mat_mod.f90 b/ext/psb_s_dns_mat_mod.f90 new file mode 100644 index 00000000..e9ea5f26 --- /dev/null +++ b/ext/psb_s_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_s_dns_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_spk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + real(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => s_dns_get_size + procedure, pass(a) :: get_nzeros => s_dns_get_nzeros + procedure, nopass :: get_fmt => s_dns_get_fmt + procedure, pass(a) :: sizeof => s_dns_sizeof + procedure, pass(a) :: csmv => psb_s_dns_csmv + procedure, pass(a) :: csmm => psb_s_dns_csmm + procedure, pass(a) :: csnmi => psb_s_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_s_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_s_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_s_dns_get_diag + procedure, pass(a) :: csgetrow => psb_s_dns_csgetrow + procedure, pass(a) :: get_nz_row => s_dns_get_nz_row + procedure, pass(a) :: trim => psb_s_dns_trim + procedure, pass(a) :: free => s_dns_free + procedure, pass(a) :: mold => psb_s_dns_mold + + end type psb_s_dns_sparse_mat + + private :: s_dns_get_nzeros, s_dns_free, s_dns_get_fmt, & + & s_dns_get_size, s_dns_sizeof, s_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_s_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_s_dns_reallocate_nz(nz,a) + import :: psb_s_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_dns_sparse_mat), intent(inout) :: a + end subroutine psb_s_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_s_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_s_dns_trim(a) + import :: psb_s_dns_sparse_mat + class(psb_s_dns_sparse_mat), intent(inout) :: a + end subroutine psb_s_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_s_dns_sparse_mat + !! \brief Allocate a class(psb_s_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_dns_mold(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_s_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_s_dns_allocate_mnnz(m,n,a,nz) + import :: psb_s_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Copy and convert to psb_s_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_cp_dns_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_dns_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Copy and convert from psb_s_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_s_cp_dns_from_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Convert to psb_s_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_s_mv_dns_to_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_s_dns_sparse_mat + !! \brief Convert from psb_s_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_s_mv_dns_from_coo(a,b,info) + import :: psb_s_dns_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_s_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_s_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_s_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_s_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_s_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_s_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_s_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_s_dns_csnmi(a) result(res) + import :: psb_s_dns_sparse_mat, psb_spk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_s_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_s_dns_get_diag(a,d,info) + import :: psb_s_dns_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_dns_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_s_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function s_dns_sizeof(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function s_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_s_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function s_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function s_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_s_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function s_dns_get_nzeros(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function s_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_s_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function s_dns_get_size(a) result(res) + implicit none + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function s_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_s_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function s_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function s_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_s_dns_sparse_mat + !! Name says all + + subroutine s_dns_free(a) + implicit none + + class(psb_s_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_dns_free + + +end module psb_s_dns_mat_mod diff --git a/ext/psb_s_ell_mat_mod.f90 b/ext/psb_s_ell_mat_mod.f90 new file mode 100644 index 00000000..9e3dd8b4 --- /dev/null +++ b/ext/psb_s_ell_mat_mod.f90 @@ -0,0 +1,552 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_ell_mat_mod + + use psb_s_base_mat_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + real(psb_spk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => s_ell_is_by_rows + procedure, pass(a) :: get_size => s_ell_get_size + procedure, pass(a) :: get_nzeros => s_ell_get_nzeros + procedure, nopass :: get_fmt => s_ell_get_fmt + procedure, pass(a) :: sizeof => s_ell_sizeof + procedure, pass(a) :: csmm => psb_s_ell_csmm + procedure, pass(a) :: csmv => psb_s_ell_csmv + procedure, pass(a) :: inner_cssm => psb_s_ell_cssm + procedure, pass(a) :: inner_cssv => psb_s_ell_cssv + procedure, pass(a) :: scals => psb_s_ell_scals + procedure, pass(a) :: scalv => psb_s_ell_scal + procedure, pass(a) :: maxval => psb_s_ell_maxval + procedure, pass(a) :: csnmi => psb_s_ell_csnmi + procedure, pass(a) :: csnm1 => psb_s_ell_csnm1 + procedure, pass(a) :: rowsum => psb_s_ell_rowsum + procedure, pass(a) :: arwsum => psb_s_ell_arwsum + procedure, pass(a) :: colsum => psb_s_ell_colsum + procedure, pass(a) :: aclsum => psb_s_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_s_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_s_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_s_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_s_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_s_ell_csput_a + procedure, pass(a) :: get_diag => psb_s_ell_get_diag + procedure, pass(a) :: csgetptn => psb_s_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_s_ell_csgetrow + procedure, pass(a) :: get_nz_row => s_ell_get_nz_row + procedure, pass(a) :: reinit => psb_s_ell_reinit + procedure, pass(a) :: trim => psb_s_ell_trim + procedure, pass(a) :: print => psb_s_ell_print + procedure, pass(a) :: free => s_ell_free + procedure, pass(a) :: mold => psb_s_ell_mold + procedure, pass(a) :: get_nrm => s_ell_get_nrm + + end type psb_s_ell_sparse_mat + + private :: s_ell_get_nzeros, s_ell_free, s_ell_get_fmt, & + & s_ell_get_size, s_ell_sizeof, s_ell_get_nz_row, & + & s_ell_is_by_rows + + interface + subroutine psb_s_ell_reallocate_nz(nz,a) + import :: psb_s_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_ell_sparse_mat), intent(inout) :: a + end subroutine psb_s_ell_reallocate_nz + end interface + + interface + subroutine psb_s_ell_reinit(a,clear) + import :: psb_s_ell_sparse_mat + class(psb_s_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_ell_reinit + end interface + + interface + subroutine psb_s_ell_trim(a) + import :: psb_s_ell_sparse_mat + class(psb_s_ell_sparse_mat), intent(inout) :: a + end subroutine psb_s_ell_trim + end interface + + interface + subroutine psb_s_ell_mold(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_mold + end interface + + interface + subroutine psb_s_ell_allocate_mnnz(m,n,a,nz) + import :: psb_s_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_ell_allocate_mnnz + end interface + + interface + subroutine psb_s_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_ell_print + end interface + + interface + subroutine psb_s_cp_ell_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_ell_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_to_coo + end interface + + interface + subroutine psb_s_cp_ell_from_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_from_coo + end interface + + interface + subroutine psb_s_cp_ell_to_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_to_fmt + end interface + + interface + subroutine psb_s_cp_ell_from_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_ell_from_fmt + end interface + + interface + subroutine psb_s_mv_ell_to_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_to_coo + end interface + + interface + subroutine psb_s_mv_ell_from_coo(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_from_coo + end interface + + interface + subroutine psb_s_mv_ell_to_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_to_fmt + end interface + + interface + subroutine psb_s_mv_ell_from_fmt(a,b,info) + import :: psb_s_ell_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_ell_from_fmt + end interface + + interface + subroutine psb_s_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_csput_a + end interface + + interface + subroutine psb_s_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_ell_csgetptn + end interface + + interface + subroutine psb_s_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_ell_csgetrow + end interface + + interface + subroutine psb_s_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_ell_csgetblk + end interface + + interface + subroutine psb_s_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_cssv + subroutine psb_s_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_cssm + end interface + + interface + subroutine psb_s_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_csmv + subroutine psb_s_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_ell_csmm + end interface + + + interface + function psb_s_ell_maxval(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_maxval + end interface + + interface + function psb_s_ell_csnmi(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_csnmi + end interface + + interface + function psb_s_ell_csnm1(a) result(res) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_ell_csnm1 + end interface + + interface + subroutine psb_s_ell_rowsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_rowsum + end interface + + interface + subroutine psb_s_ell_arwsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_arwsum + end interface + + interface + subroutine psb_s_ell_colsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_colsum + end interface + + interface + subroutine psb_s_ell_aclsum(d,a) + import :: psb_s_ell_sparse_mat, psb_spk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_ell_aclsum + end interface + + interface + subroutine psb_s_ell_get_diag(a,d,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_get_diag + end interface + + interface + subroutine psb_s_ell_scal(d,a,info,side) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_ell_scal + end interface + + interface + subroutine psb_s_ell_scals(d,a,info) + import :: psb_s_ell_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_ell_scals + end interface + + interface + subroutine psi_s_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_s_ell_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_s_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_s_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_ell_is_by_rows(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function s_ell_is_by_rows + + function s_ell_sizeof(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function s_ell_sizeof + + function s_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function s_ell_get_fmt + + function s_ell_get_nrm(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function s_ell_get_nrm + + function s_ell_get_nzeros(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function s_ell_get_nzeros + + function s_ell_get_size(a) result(res) + implicit none + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function s_ell_get_size + + + function s_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function s_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_ell_free(a) + implicit none + + class(psb_s_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_ell_free + + +end module psb_s_ell_mat_mod diff --git a/ext/psb_s_hdia_mat_mod.f90 b/ext/psb_s_hdia_mat_mod.f90 new file mode 100644 index 00000000..b7b2b110 --- /dev/null +++ b/ext/psb_s_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_s_hdia_mat_mod + + use psb_s_base_mat_mod + + + type, extends(psb_s_base_sparse_mat) :: psb_s_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + real(psb_spk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => s_hdia_get_size + procedure, pass(a) :: get_nzeros => s_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => s_hdia_set_nzeros + procedure, nopass :: get_fmt => s_hdia_get_fmt + procedure, pass(a) :: sizeof => s_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_s_hdia_csmm + procedure, pass(a) :: csmv => psb_s_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_s_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_s_hdia_cssv + ! procedure, pass(a) :: scals => psb_s_hdia_scals + ! procedure, pass(a) :: scalv => psb_s_hdia_scal + ! procedure, pass(a) :: maxval => psb_s_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_s_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_s_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_s_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_s_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_s_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_s_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_s_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_s_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_s_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_s_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_s_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_s_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_s_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_s_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_s_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => s_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_s_hdia_reinit + ! procedure, pass(a) :: trim => psb_s_hdia_trim + procedure, pass(a) :: print => psb_s_hdia_print + procedure, pass(a) :: free => s_hdia_free + procedure, pass(a) :: mold => psb_s_hdia_mold + + end type psb_s_hdia_sparse_mat + + private :: s_hdia_get_nzeros, s_hdia_set_nzeros, s_hdia_free, & + & s_hdia_get_fmt, s_hdia_sizeof +!!$ & +!!$ & s_hdia_get_nz_row s_hdia_get_size, + +!!$ interface +!!$ subroutine psb_s_hdia_reallocate_nz(nz,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_reinit(a,clear) +!!$ import :: psb_s_hdia_sparse_mat +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_s_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_trim(a) +!!$ import :: psb_s_hdia_sparse_mat +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_s_hdia_trim +!!$ end interface + + interface + subroutine psb_s_hdia_mold(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hdia_mold + end interface + + interface + subroutine psb_s_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_s_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hdia_allocate_mnnz + end interface + + interface + subroutine psb_s_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_hdia_print + end interface + + interface + subroutine psb_s_cp_hdia_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_hdia_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdia_to_coo + end interface + + interface + subroutine psb_s_cp_hdia_from_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_s_mv_hdia_to_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdia_to_coo + end interface + + interface + subroutine psb_s_mv_hdia_from_coo(a,b,info) + import :: psb_s_hdia_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_s_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_s_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ real(psb_spk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_s_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_s_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:) +!!$ real(psb_spk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_cssv +!!$ subroutine psb_s_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_cssm +!!$ end interface + + interface + subroutine psb_s_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hdia_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hdia_csmv +!!$ subroutine psb_s_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(in) :: alpha, beta, x(:,:) +!!$ real(psb_spk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_s_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_s_hdia_maxval(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_s_hdia_csnmi(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_s_hdia_csnm1(a) result(res) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_) :: res +!!$ end function psb_s_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_rowsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_arwsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_colsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_aclsum(d,a) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ end subroutine psb_s_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_get_diag(a,d,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_spk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_s_hdia_scal(d,a,info,side) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_s_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_s_hdia_scals(d,a,info) +!!$ import :: psb_s_hdia_sparse_mat, psb_spk_, psb_ipk_ +!!$ class(psb_s_hdia_sparse_mat), intent(inout) :: a +!!$ real(psb_spk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_s_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * psb_sizeof_sp + + end function s_hdia_sizeof + + function s_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function s_hdia_get_fmt + + function s_hdia_get_nzeros(a) result(res) + implicit none + class(psb_s_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function s_hdia_get_nzeros + + subroutine s_hdia_set_nzeros(a,nz) + implicit none + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine s_hdia_set_nzeros + + ! function s_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_s_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function s_hdia_get_size + + + ! function s_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_s_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function s_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine s_hdia_free(a) + implicit none + + class(psb_s_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine s_hdia_free + + +end module psb_s_hdia_mat_mod diff --git a/ext/psb_s_hll_mat_mod.f90 b/ext/psb_s_hll_mat_mod.f90 new file mode 100644 index 00000000..735091c8 --- /dev/null +++ b/ext/psb_s_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_s_hll_mat_mod + + use psb_s_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_s_base_sparse_mat) :: psb_s_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + real(psb_spk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => s_hll_get_hksz + procedure, pass(a) :: set_hksz => s_hll_set_hksz + procedure, pass(a) :: get_size => s_hll_get_size + procedure, pass(a) :: set_nzeros => s_hll_set_nzeros + procedure, pass(a) :: get_nzeros => s_hll_get_nzeros + procedure, nopass :: get_fmt => s_hll_get_fmt + procedure, pass(a) :: sizeof => s_hll_sizeof + procedure, pass(a) :: csmm => psb_s_hll_csmm + procedure, pass(a) :: csmv => psb_s_hll_csmv + procedure, pass(a) :: inner_cssm => psb_s_hll_cssm + procedure, pass(a) :: inner_cssv => psb_s_hll_cssv + procedure, pass(a) :: scals => psb_s_hll_scals + procedure, pass(a) :: scalv => psb_s_hll_scal + procedure, pass(a) :: maxval => psb_s_hll_maxval + procedure, pass(a) :: csnmi => psb_s_hll_csnmi + procedure, pass(a) :: csnm1 => psb_s_hll_csnm1 + procedure, pass(a) :: rowsum => psb_s_hll_rowsum + procedure, pass(a) :: arwsum => psb_s_hll_arwsum + procedure, pass(a) :: colsum => psb_s_hll_colsum + procedure, pass(a) :: aclsum => psb_s_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_s_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_s_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_s_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_s_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_s_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_s_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_s_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_s_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_s_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_s_hll_csput_a + procedure, pass(a) :: get_diag => psb_s_hll_get_diag + procedure, pass(a) :: csgetptn => psb_s_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_s_hll_csgetrow + procedure, pass(a) :: get_nz_row => s_hll_get_nz_row + procedure, pass(a) :: reinit => psb_s_hll_reinit + procedure, pass(a) :: print => psb_s_hll_print + procedure, pass(a) :: free => s_hll_free + procedure, pass(a) :: mold => psb_s_hll_mold + + end type psb_s_hll_sparse_mat + + private :: s_hll_get_nzeros, s_hll_free, s_hll_get_fmt, & + & s_hll_get_size, s_hll_sizeof, s_hll_get_nz_row, & + & s_hll_set_nzeros, s_hll_get_hksz, s_hll_set_hksz + + interface + subroutine psb_s_hll_reallocate_nz(nz,a) + import :: psb_s_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_s_hll_sparse_mat), intent(inout) :: a + end subroutine psb_s_hll_reallocate_nz + end interface + + interface + subroutine psb_s_hll_reinit(a,clear) + import :: psb_s_hll_sparse_mat + class(psb_s_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_s_hll_reinit + end interface + + interface + subroutine psb_s_hll_mold(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_mold + end interface + + interface + subroutine psb_s_hll_allocate_mnnz(m,n,a,nz) + import :: psb_s_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_hll_allocate_mnnz + end interface + + interface + subroutine psb_s_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_s_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_s_hll_print + end interface + + interface + subroutine psb_s_cp_hll_to_coo(a,b,info) + import :: psb_s_coo_sparse_mat, psb_s_hll_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_to_coo + end interface + + interface + subroutine psb_s_cp_hll_from_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_from_coo + end interface + + interface + subroutine psb_s_cp_hll_to_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_to_fmt + end interface + + interface + subroutine psb_s_cp_hll_from_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_cp_hll_from_fmt + end interface + + interface + subroutine psb_s_mv_hll_to_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_to_coo + end interface + + interface + subroutine psb_s_mv_hll_from_coo(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_from_coo + end interface + + interface + subroutine psb_s_mv_hll_to_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_to_fmt + end interface + + interface + subroutine psb_s_mv_hll_from_fmt(a,b,info) + import :: psb_s_hll_sparse_mat, psb_s_base_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_mv_hll_from_fmt + end interface + + interface + subroutine psb_s_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_csput_a + end interface + + interface + subroutine psb_s_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_hll_csgetptn + end interface + + interface + subroutine psb_s_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_spk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_s_hll_csgetrow + end interface + + interface + subroutine psb_s_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_s_coo_sparse_mat, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_s_hll_csgetblk + end interface + + interface + subroutine psb_s_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_cssv + subroutine psb_s_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_cssm + end interface + + interface + subroutine psb_s_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_csmv + subroutine psb_s_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_hll_csmm + end interface + + + interface + function psb_s_hll_maxval(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_maxval + end interface + + interface + function psb_s_hll_csnmi(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_csnmi + end interface + + interface + function psb_s_hll_csnm1(a) result(res) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_) :: res + end function psb_s_hll_csnm1 + end interface + + interface + subroutine psb_s_hll_rowsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_rowsum + end interface + + interface + subroutine psb_s_hll_arwsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_arwsum + end interface + + interface + subroutine psb_s_hll_colsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_colsum + end interface + + interface + subroutine psb_s_hll_aclsum(d,a) + import :: psb_s_hll_sparse_mat, psb_spk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + end subroutine psb_s_hll_aclsum + end interface + + interface + subroutine psb_s_hll_get_diag(a,d,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_get_diag + end interface + + interface + subroutine psb_s_hll_scal(d,a,info,side) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_s_hll_scal + end interface + + interface + subroutine psb_s_hll_scals(d,a,info) + import :: psb_s_hll_sparse_mat, psb_spk_, psb_ipk_ + class(psb_s_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_s_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_s_hll_sparse_mat, psb_ipk_, psb_s_coo_sparse_mat + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_s_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_s_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function s_hll_sizeof(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function s_hll_sizeof + + function s_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function s_hll_get_fmt + + subroutine s_hll_set_nzeros(a,n) + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine s_hll_set_nzeros + + function s_hll_get_nzeros(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function s_hll_get_nzeros + + function s_hll_get_size(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function s_hll_get_size + + + + function s_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function s_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine s_hll_free(a) + implicit none + + class(psb_s_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine s_hll_free + + subroutine s_hll_set_hksz(a,n) + implicit none + class(psb_s_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine s_hll_set_hksz + + function s_hll_get_hksz(a) result(res) + implicit none + class(psb_s_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function s_hll_get_hksz + +end module psb_s_hll_mat_mod diff --git a/ext/psb_z_dia_mat_mod.f90 b/ext/psb_z_dia_mat_mod.f90 new file mode 100644 index 00000000..76d071af --- /dev/null +++ b/ext/psb_z_dia_mat_mod.f90 @@ -0,0 +1,513 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_dia_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_dia_sparse_mat + ! + ! DIA format, extended. + ! + + integer(psb_ipk_), allocatable :: offset(:) + integer(psb_ipk_) :: nzeros + complex(psb_dpk_), allocatable :: data(:,:) + + contains + ! procedure, pass(a) :: get_size => z_dia_get_size + procedure, pass(a) :: get_nzeros => z_dia_get_nzeros + procedure, nopass :: get_fmt => z_dia_get_fmt + procedure, pass(a) :: sizeof => z_dia_sizeof + procedure, pass(a) :: csmm => psb_z_dia_csmm + procedure, pass(a) :: csmv => psb_z_dia_csmv + ! procedure, pass(a) :: inner_cssm => psb_z_dia_cssm + ! procedure, pass(a) :: inner_cssv => psb_z_dia_cssv + procedure, pass(a) :: scals => psb_z_dia_scals + procedure, pass(a) :: scalv => psb_z_dia_scal + procedure, pass(a) :: maxval => psb_z_dia_maxval + procedure, pass(a) :: rowsum => psb_z_dia_rowsum + procedure, pass(a) :: arwsum => psb_z_dia_arwsum + procedure, pass(a) :: colsum => psb_z_dia_colsum + procedure, pass(a) :: aclsum => psb_z_dia_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_dia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_dia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_dia_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_dia_from_coo + ! procedure, pass(a) :: mv_to_coo => psb_z_mv_dia_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_dia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_z_mv_dia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_dia_from_fmt + ! procedure, pass(a) :: csput_a => psb_z_dia_csput_a + procedure, pass(a) :: get_diag => psb_z_dia_get_diag + procedure, pass(a) :: csgetptn => psb_z_dia_csgetptn + procedure, pass(a) :: csgetrow => psb_z_dia_csgetrow + ! procedure, pass(a) :: get_nz_row => z_dia_get_nz_row + procedure, pass(a) :: reinit => psb_z_dia_reinit + ! procedure, pass(a) :: trim => psb_z_dia_trim + procedure, pass(a) :: print => psb_z_dia_print + procedure, pass(a) :: free => z_dia_free + procedure, pass(a) :: mold => psb_z_dia_mold + + end type psb_z_dia_sparse_mat + + private :: z_dia_get_nzeros, z_dia_free, z_dia_get_fmt, & + & z_dia_sizeof !, z_dia_get_size, z_dia_get_nz_row + + interface + subroutine psb_z_dia_reallocate_nz(nz,a) + import :: psb_z_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dia_sparse_mat), intent(inout) :: a + end subroutine psb_z_dia_reallocate_nz + end interface + + interface + subroutine psb_z_dia_reinit(a,clear) + import :: psb_z_dia_sparse_mat + class(psb_z_dia_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_dia_reinit + end interface + + interface + subroutine psb_z_dia_trim(a) + import :: psb_z_dia_sparse_mat + class(psb_z_dia_sparse_mat), intent(inout) :: a + end subroutine psb_z_dia_trim + end interface + + interface + subroutine psb_z_dia_mold(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_mold + end interface + + interface + subroutine psb_z_dia_allocate_mnnz(m,n,a,nz) + import :: psb_z_dia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_dia_allocate_mnnz + end interface + + interface + subroutine psb_z_dia_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_dia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_dia_print + end interface + + interface + subroutine psb_z_cp_dia_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_dia_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_to_coo + end interface + + interface + subroutine psb_z_cp_dia_from_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_from_coo + end interface + + interface + subroutine psb_z_cp_dia_to_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_to_fmt + end interface + + interface + subroutine psb_z_cp_dia_from_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dia_from_fmt + end interface + + interface + subroutine psb_z_mv_dia_to_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_to_coo + end interface + + interface + subroutine psb_z_mv_dia_from_coo(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_from_coo + end interface + + interface + subroutine psb_z_mv_dia_to_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_to_fmt + end interface + + interface + subroutine psb_z_mv_dia_from_fmt(a,b,info) + import :: psb_z_dia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dia_from_fmt + end interface + + interface + subroutine psb_z_dia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_csput_a + end interface + + interface + subroutine psb_z_dia_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_dia_csgetptn + end interface + + interface + subroutine psb_z_dia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_dia_csgetrow + end interface + + interface + subroutine psb_z_dia_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_dia_csgetblk + end interface + + interface + subroutine psb_z_dia_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_cssv + subroutine psb_z_dia_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_cssm + end interface + + interface + subroutine psb_z_dia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_csmv + subroutine psb_z_dia_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dia_csmm + end interface + + + interface + function psb_z_dia_maxval(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_maxval + end interface + + interface + function psb_z_dia_csnmi(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_csnmi + end interface + + interface + function psb_z_dia_csnm1(a) result(res) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dia_csnm1 + end interface + + interface + subroutine psb_z_dia_rowsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_rowsum + end interface + + interface + subroutine psb_z_dia_arwsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_arwsum + end interface + + interface + subroutine psb_z_dia_colsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_colsum + end interface + + interface + subroutine psb_z_dia_aclsum(d,a) + import :: psb_z_dia_sparse_mat, psb_dpk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_dia_aclsum + end interface + + interface + subroutine psb_z_dia_get_diag(a,d,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_get_diag + end interface + + interface + subroutine psb_z_dia_scal(d,a,info,side) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_dia_scal + end interface + + interface + subroutine psb_z_dia_scals(d,a,info) + import :: psb_z_dia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dia_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dia_scals + end interface + + interface psi_convert_dia_from_coo + subroutine psi_z_convert_dia_from_coo(a,tmp,info) + import :: psb_z_dia_sparse_mat, psb_ipk_, psb_z_coo_sparse_mat + implicit none + class(psb_z_dia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_z_convert_dia_from_coo + end interface + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_dia_sizeof(a) result(res) + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%data) + res = res + psb_sizeof_ip * size(a%offset) + + end function z_dia_sizeof + + function z_dia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DIA' + end function z_dia_get_fmt + + function z_dia_get_nzeros(a) result(res) + implicit none + class(psb_z_dia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function z_dia_get_nzeros + + ! function z_dia_get_size(a) result(res) + ! implicit none + ! class(psb_z_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function z_dia_get_size + + + ! function z_dia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_z_dia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function z_dia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine z_dia_free(a) + implicit none + + class(psb_z_dia_sparse_mat), intent(inout) :: a + + if (allocated(a%data)) deallocate(a%data) + if (allocated(a%offset)) deallocate(a%offset) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_dia_free + + +end module psb_z_dia_mat_mod diff --git a/ext/psb_z_dns_mat_mod.f90 b/ext/psb_z_dns_mat_mod.f90 new file mode 100644 index 00000000..6147057d --- /dev/null +++ b/ext/psb_z_dns_mat_mod.f90 @@ -0,0 +1,467 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module psb_z_dns_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_dns_sparse_mat + ! + ! DNS format: a very simple dense matrix storage + ! psb_dpk_ : kind for double precision reals + ! psb_ipk_: kind for normal integers. + ! psb_sizeof_dp: variable holding size in bytes of + ! a double + ! psb_sizeof_ip: size in bytes of an integer + ! + ! psb_realloc(n,v,info) Reallocate: does what it says + ! psb_realloc(m,n,a,info) on rank 1 and 2 arrays, may start + ! from unallocated + ! + ! + integer(psb_ipk_) :: nnz + complex(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: get_size => z_dns_get_size + procedure, pass(a) :: get_nzeros => z_dns_get_nzeros + procedure, nopass :: get_fmt => z_dns_get_fmt + procedure, pass(a) :: sizeof => z_dns_sizeof + procedure, pass(a) :: csmv => psb_z_dns_csmv + procedure, pass(a) :: csmm => psb_z_dns_csmm + procedure, pass(a) :: csnmi => psb_z_dns_csnmi + procedure, pass(a) :: reallocate_nz => psb_z_dns_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_dns_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_dns_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_dns_from_coo + procedure, pass(a) :: mv_to_coo => psb_z_mv_dns_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_dns_from_coo + procedure, pass(a) :: get_diag => psb_z_dns_get_diag + procedure, pass(a) :: csgetrow => psb_z_dns_csgetrow + procedure, pass(a) :: get_nz_row => z_dns_get_nz_row + procedure, pass(a) :: trim => psb_z_dns_trim + procedure, pass(a) :: free => z_dns_free + procedure, pass(a) :: mold => psb_z_dns_mold + + end type psb_z_dns_sparse_mat + + private :: z_dns_get_nzeros, z_dns_free, z_dns_get_fmt, & + & z_dns_get_size, z_dns_sizeof, z_dns_get_nz_row + + ! + ! + !> Function reallocate_nz + !! \memberof psb_z_dns_sparse_mat + !! \brief One--parameters version of (re)allocate + !! + !! \param nz number of nonzeros to allocate for + !! i.e. makes sure that the internal storage + !! allows for NZ coefficients and their indices. + ! + interface + subroutine psb_z_dns_reallocate_nz(nz,a) + import :: psb_z_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_dns_sparse_mat), intent(inout) :: a + end subroutine psb_z_dns_reallocate_nz + end interface + + !> Function trim + !! \memberof psb_z_dns_sparse_mat + !! \brief Memory trim + !! Make sure the memory allocation of the sparse matrix is as tight as + !! possible given the actual number of nonzeros it contains. + ! + interface + subroutine psb_z_dns_trim(a) + import :: psb_z_dns_sparse_mat + class(psb_z_dns_sparse_mat), intent(inout) :: a + end subroutine psb_z_dns_trim + end interface + + ! + !> Function mold: + !! \memberof psb_z_dns_sparse_mat + !! \brief Allocate a class(psb_z_dns_sparse_mat) with the + !! same dynamic type as the input. + !! This is equivalent to allocate( mold= ) and is provided + !! for those compilers not yet supporting mold. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_dns_mold(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_base_sparse_mat, psb_epk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dns_mold + end interface + + ! + ! + !> Function allocate_mnnz + !! \memberof psb_z_dns_sparse_mat + !! \brief Three-parameters version of allocate + !! + !! \param m number of rows + !! \param n number of cols + !! \param nz [estimated internally] number of nonzeros to allocate for + ! + interface + subroutine psb_z_dns_allocate_mnnz(m,n,a,nz) + import :: psb_z_dns_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_dns_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_dns_allocate_mnnz + end interface + + ! + !> Function cp_to_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Copy and convert to psb_z_coo_sparse_mat + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_cp_dns_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_dns_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dns_to_coo + end interface + + ! + !> Function cp_from_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Copy and convert from psb_z_coo_sparse_mat + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_z_cp_dns_from_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_dns_from_coo + end interface + + ! + !> Function mv_to_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Convert to psb_z_coo_sparse_mat, freeing the source. + !! Invoked from the source object. + !! \param b The output variable + !! \param info return code + ! + interface + subroutine psb_z_mv_dns_to_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dns_to_coo + end interface + + ! + !> Function mv_from_coo: + !! \memberof psb_z_dns_sparse_mat + !! \brief Convert from psb_z_coo_sparse_mat, freeing the source. + !! Invoked from the target object. + !! \param b The input variable + !! \param info return code + ! + interface + subroutine psb_z_mv_dns_from_coo(a,b,info) + import :: psb_z_dns_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_dns_from_coo + end interface + + ! + ! + !> Function csgetrow: + !! \memberof psb_z_dns_sparse_mat + !! \brief Get a (subset of) row(s) + !! + !! getrow is the basic method by which the other (getblk, clip) can + !! be implemented. + !! + !! Returns the set + !! NZ, IA(1:nz), JA(1:nz), VAL(1:NZ) + !! each identifying the position of a nonzero in A + !! i.e. + !! VAL(1:NZ) = A(IA(1:NZ),JA(1:NZ)) + !! with IMIN<=IA(:)<=IMAX + !! with JMIN<=JA(:)<=JMAX + !! IA,JA are reallocated as necessary. + !! + !! \param imin the minimum row index we are interested in + !! \param imax the minimum row index we are interested in + !! \param nz the number of output coefficients + !! \param ia(:) the output row indices + !! \param ja(:) the output col indices + !! \param val(:) the output coefficients + !! \param info return code + !! \param jmin [1] minimum col index + !! \param jmax [a\%get_ncols()] maximum col index + !! \param iren(:) [none] an array to return renumbered indices (iren(ia(:)),iren(ja(:)) + !! \param rscale [false] map [min(ia(:)):max(ia(:))] onto [1:max(ia(:))-min(ia(:))+1] + !! \param cscale [false] map [min(ja(:)):max(ja(:))] onto [1:max(ja(:))-min(ja(:))+1] + !! ( iren cannot be specified with rscale/cscale) + !! \param append [false] append to ia,ja + !! \param nzin [none] if append, then first new entry should go in entry nzin+1 + !! + ! + interface + subroutine psb_z_dns_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_dns_csgetrow + end interface + + + + !> Function csmv: + !! \memberof psb_z_dns_sparse_mat + !! \brief Product by a dense rank 1 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_z_dns_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dns_csmv + end interface + + !> Function csmm: + !! \memberof psb_z_dns_sparse_mat + !! \brief Product by a dense rank 2 array. + !! + !! Compute + !! Y = alpha*op(A)*X + beta*Y + !! + !! \param alpha Scaling factor for Ax + !! \param A the input sparse matrix + !! \param x(:,:) the input dense X + !! \param beta Scaling factor for y + !! \param y(:,:) the input/output dense Y + !! \param info return code + !! \param trans [N] Whether to use A (N), its transpose (T) + !! or its conjugate transpose (C) + !! + ! + interface + subroutine psb_z_dns_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_dns_csmm + end interface + + ! + ! + !> Function csnmi: + !! \memberof psb_z_dns_sparse_mat + !! \brief Operator infinity norm + !! CSNMI = MAXVAL(SUM(ABS(A(:,:)),dim=2)) + !! + ! + interface + function psb_z_dns_csnmi(a) result(res) + import :: psb_z_dns_sparse_mat, psb_dpk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_dns_csnmi + end interface + + ! + !> Function get_diag: + !! \memberof psb_z_dns_sparse_mat + !! \brief Extract the diagonal of A. + !! + !! D(i) = A(i:i), i=1:min(nrows,ncols) + !! + !! \param d(:) The output diagonal + !! \param info return code. + ! + interface + subroutine psb_z_dns_get_diag(a,d,info) + import :: psb_z_dns_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_dns_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_dns_get_diag + end interface + + +contains + + ! + !> Function sizeof + !! \memberof psb_z_dns_sparse_mat + !! \brief Memory occupation in bytes + ! + function z_dns_sizeof(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + res = psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip + + end function z_dns_sizeof + + ! + !> Function get_fmt + !! \memberof psb_z_dns_sparse_mat + !! \brief return a short descriptive name (e.g. COO CSR etc.) + ! + function z_dns_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'DNS' + end function z_dns_get_fmt + + ! + !> Function get_nzeros + !! \memberof psb_z_dns_sparse_mat + !! \brief Current number of nonzero entries + ! + function z_dns_get_nzeros(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nnz + end function z_dns_get_nzeros + + ! + !> Function get_size + !! \memberof psb_z_dns_sparse_mat + !! \brief Maximum number of nonzeros the current structure can hold + ! this is fixed once you initialize the matrix, with dense storage + ! you can hold up to MxN entries + function z_dns_get_size(a) result(res) + implicit none + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = size(a%val) + + end function z_dns_get_size + + + ! + !> Function get_nz_row. + !! \memberof psb_z_coo_sparse_mat + !! \brief How many nonzeros in a row? + !! + !! \param idx The row to search. + !! + ! + function z_dns_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_dns_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = count(a%val(idx,:) /= dzero) + end if + + end function z_dns_get_nz_row + + ! + !> Function free + !! \memberof psb_z_dns_sparse_mat + !! Name says all + + subroutine z_dns_free(a) + implicit none + + class(psb_z_dns_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) deallocate(a%val) + a%nnz = 0 + + + ! + ! Mark the object as empty just in case + ! + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_dns_free + + +end module psb_z_dns_mat_mod diff --git a/ext/psb_z_ell_mat_mod.f90 b/ext/psb_z_ell_mat_mod.f90 new file mode 100644 index 00000000..0e1f0e00 --- /dev/null +++ b/ext/psb_z_ell_mat_mod.f90 @@ -0,0 +1,552 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_ell_mat_mod + + use psb_z_base_mat_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_ell_sparse_mat + ! + ! ITPACK/ELL format, extended. + ! Based on M. Heroux "A proposal for a sparse BLAS toolkit". + ! IRN is our addition, should help in transferring to/from + ! other formats (should come in handy for GPUs). + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! + + integer(psb_ipk_) :: nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:,:), idiag(:) + complex(psb_dpk_), allocatable :: val(:,:) + + contains + procedure, pass(a) :: is_by_rows => z_ell_is_by_rows + procedure, pass(a) :: get_size => z_ell_get_size + procedure, pass(a) :: get_nzeros => z_ell_get_nzeros + procedure, nopass :: get_fmt => z_ell_get_fmt + procedure, pass(a) :: sizeof => z_ell_sizeof + procedure, pass(a) :: csmm => psb_z_ell_csmm + procedure, pass(a) :: csmv => psb_z_ell_csmv + procedure, pass(a) :: inner_cssm => psb_z_ell_cssm + procedure, pass(a) :: inner_cssv => psb_z_ell_cssv + procedure, pass(a) :: scals => psb_z_ell_scals + procedure, pass(a) :: scalv => psb_z_ell_scal + procedure, pass(a) :: maxval => psb_z_ell_maxval + procedure, pass(a) :: csnmi => psb_z_ell_csnmi + procedure, pass(a) :: csnm1 => psb_z_ell_csnm1 + procedure, pass(a) :: rowsum => psb_z_ell_rowsum + procedure, pass(a) :: arwsum => psb_z_ell_arwsum + procedure, pass(a) :: colsum => psb_z_ell_colsum + procedure, pass(a) :: aclsum => psb_z_ell_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_ell_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_ell_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_ell_from_coo + procedure, pass(a) :: cp_to_fmt => psb_z_cp_ell_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_z_cp_ell_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_ell_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_ell_from_coo + procedure, pass(a) :: mv_to_fmt => psb_z_mv_ell_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_z_mv_ell_from_fmt + procedure, pass(a) :: csput_a => psb_z_ell_csput_a + procedure, pass(a) :: get_diag => psb_z_ell_get_diag + procedure, pass(a) :: csgetptn => psb_z_ell_csgetptn + procedure, pass(a) :: csgetrow => psb_z_ell_csgetrow + procedure, pass(a) :: get_nz_row => z_ell_get_nz_row + procedure, pass(a) :: reinit => psb_z_ell_reinit + procedure, pass(a) :: trim => psb_z_ell_trim + procedure, pass(a) :: print => psb_z_ell_print + procedure, pass(a) :: free => z_ell_free + procedure, pass(a) :: mold => psb_z_ell_mold + procedure, pass(a) :: get_nrm => z_ell_get_nrm + + end type psb_z_ell_sparse_mat + + private :: z_ell_get_nzeros, z_ell_free, z_ell_get_fmt, & + & z_ell_get_size, z_ell_sizeof, z_ell_get_nz_row, & + & z_ell_is_by_rows + + interface + subroutine psb_z_ell_reallocate_nz(nz,a) + import :: psb_z_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_ell_sparse_mat), intent(inout) :: a + end subroutine psb_z_ell_reallocate_nz + end interface + + interface + subroutine psb_z_ell_reinit(a,clear) + import :: psb_z_ell_sparse_mat + class(psb_z_ell_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_ell_reinit + end interface + + interface + subroutine psb_z_ell_trim(a) + import :: psb_z_ell_sparse_mat + class(psb_z_ell_sparse_mat), intent(inout) :: a + end subroutine psb_z_ell_trim + end interface + + interface + subroutine psb_z_ell_mold(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_mold + end interface + + interface + subroutine psb_z_ell_allocate_mnnz(m,n,a,nz) + import :: psb_z_ell_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_ell_allocate_mnnz + end interface + + interface + subroutine psb_z_ell_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_ell_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_ell_print + end interface + + interface + subroutine psb_z_cp_ell_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_ell_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_to_coo + end interface + + interface + subroutine psb_z_cp_ell_from_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_from_coo + end interface + + interface + subroutine psb_z_cp_ell_to_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_to_fmt + end interface + + interface + subroutine psb_z_cp_ell_from_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_ell_from_fmt + end interface + + interface + subroutine psb_z_mv_ell_to_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_to_coo + end interface + + interface + subroutine psb_z_mv_ell_from_coo(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_from_coo + end interface + + interface + subroutine psb_z_mv_ell_to_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_to_fmt + end interface + + interface + subroutine psb_z_mv_ell_from_fmt(a,b,info) + import :: psb_z_ell_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_ell_from_fmt + end interface + + interface + subroutine psb_z_ell_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_csput_a + end interface + + interface + subroutine psb_z_ell_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_ell_csgetptn + end interface + + interface + subroutine psb_z_ell_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_ell_csgetrow + end interface + + interface + subroutine psb_z_ell_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_ell_csgetblk + end interface + + interface + subroutine psb_z_ell_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_cssv + subroutine psb_z_ell_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_cssm + end interface + + interface + subroutine psb_z_ell_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_csmv + subroutine psb_z_ell_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_ell_csmm + end interface + + + interface + function psb_z_ell_maxval(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_maxval + end interface + + interface + function psb_z_ell_csnmi(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_csnmi + end interface + + interface + function psb_z_ell_csnm1(a) result(res) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_ell_csnm1 + end interface + + interface + subroutine psb_z_ell_rowsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_rowsum + end interface + + interface + subroutine psb_z_ell_arwsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_arwsum + end interface + + interface + subroutine psb_z_ell_colsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_colsum + end interface + + interface + subroutine psb_z_ell_aclsum(d,a) + import :: psb_z_ell_sparse_mat, psb_dpk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_ell_aclsum + end interface + + interface + subroutine psb_z_ell_get_diag(a,d,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_get_diag + end interface + + interface + subroutine psb_z_ell_scal(d,a,info,side) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_ell_scal + end interface + + interface + subroutine psb_z_ell_scals(d,a,info) + import :: psb_z_ell_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_ell_scals + end interface + + interface + subroutine psi_z_convert_ell_from_coo(a,tmp,info,hacksize) + import :: psb_z_ell_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + implicit none + class(psb_z_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: hacksize + end subroutine psi_z_convert_ell_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_ell_is_by_rows(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + logical :: res + res = .true. + end function z_ell_is_by_rows + + function z_ell_sizeof(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + + end function z_ell_sizeof + + function z_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL' + end function z_ell_get_fmt + + function z_ell_get_nrm(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = size(a%val,2) + end function z_ell_get_nrm + + function z_ell_get_nzeros(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function z_ell_get_nzeros + + function z_ell_get_size(a) result(res) + implicit none + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = -1 + if (a%is_dev()) call a%sync() + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function z_ell_get_size + + + function z_ell_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_ell_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + if (a%is_dev()) call a%sync() + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function z_ell_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_ell_free(a) + implicit none + + class(psb_z_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_ell_free + + +end module psb_z_ell_mat_mod diff --git a/ext/psb_z_hdia_mat_mod.f90 b/ext/psb_z_hdia_mat_mod.f90 new file mode 100644 index 00000000..e7c11321 --- /dev/null +++ b/ext/psb_z_hdia_mat_mod.f90 @@ -0,0 +1,534 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + +module psb_z_hdia_mat_mod + + use psb_z_base_mat_mod + + + type, extends(psb_z_base_sparse_mat) :: psb_z_hdia_sparse_mat + ! + ! HDIA format + ! + integer(psb_ipk_), allocatable :: hackOffsets(:), diaOffsets(:) + complex(psb_dpk_), allocatable :: val(:) + + + integer(psb_ipk_) :: nhacks, nzeros + integer(psb_ipk_) :: hacksize = 32 + integer(psb_epk_) :: dim=0 + + contains + ! procedure, pass(a) :: get_size => z_hdia_get_size + procedure, pass(a) :: get_nzeros => z_hdia_get_nzeros + procedure, pass(a) :: set_nzeros => z_hdia_set_nzeros + procedure, nopass :: get_fmt => z_hdia_get_fmt + procedure, pass(a) :: sizeof => z_hdia_sizeof + ! procedure, pass(a) :: csmm => psb_z_hdia_csmm + procedure, pass(a) :: csmv => psb_z_hdia_csmv + ! procedure, pass(a) :: inner_cssm => psb_z_hdia_cssm + ! procedure, pass(a) :: inner_cssv => psb_z_hdia_cssv + ! procedure, pass(a) :: scals => psb_z_hdia_scals + ! procedure, pass(a) :: scalv => psb_z_hdia_scal + ! procedure, pass(a) :: maxval => psb_z_hdia_maxval + ! procedure, pass(a) :: csnmi => psb_z_hdia_csnmi + ! procedure, pass(a) :: csnm1 => psb_z_hdia_csnm1 + ! procedure, pass(a) :: rowsum => psb_z_hdia_rowsum + ! procedure, pass(a) :: arwsum => psb_z_hdia_arwsum + ! procedure, pass(a) :: colsum => psb_z_hdia_colsum + ! procedure, pass(a) :: aclsum => psb_z_hdia_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_z_hdia_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hdia_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_hdia_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_hdia_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_z_cp_hdia_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_z_cp_hdia_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_hdia_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_hdia_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_z_mv_hdia_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_z_mv_hdia_from_fmt + ! procedure, pass(a) :: csput_a => psb_z_hdia_csput_a + ! procedure, pass(a) :: get_diag => psb_z_hdia_get_diag + ! procedure, pass(a) :: csgetptn => psb_z_hdia_csgetptn + ! procedure, pass(a) :: csgetrow => psb_z_hdia_csgetrow + ! procedure, pass(a) :: get_nz_row => z_hdia_get_nz_row + ! procedure, pass(a) :: reinit => psb_z_hdia_reinit + ! procedure, pass(a) :: trim => psb_z_hdia_trim + procedure, pass(a) :: print => psb_z_hdia_print + procedure, pass(a) :: free => z_hdia_free + procedure, pass(a) :: mold => psb_z_hdia_mold + + end type psb_z_hdia_sparse_mat + + private :: z_hdia_get_nzeros, z_hdia_set_nzeros, z_hdia_free, & + & z_hdia_get_fmt, z_hdia_sizeof +!!$ & +!!$ & z_hdia_get_nz_row z_hdia_get_size, + +!!$ interface +!!$ subroutine psb_z_hdia_reallocate_nz(nz,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_ipk_ +!!$ integer(psb_ipk_), intent(in) :: nz +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdia_reallocate_nz +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_reinit(a,clear) +!!$ import :: psb_z_hdia_sparse_mat +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ logical, intent(in), optional :: clear +!!$ end subroutine psb_z_hdia_reinit +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_trim(a) +!!$ import :: psb_z_hdia_sparse_mat +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ end subroutine psb_z_hdia_trim +!!$ end interface + + interface + subroutine psb_z_hdia_mold(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hdia_mold + end interface + + interface + subroutine psb_z_hdia_allocate_mnnz(m,n,a,nz) + import :: psb_z_hdia_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hdia_allocate_mnnz + end interface + + interface + subroutine psb_z_hdia_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_hdia_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_hdia_print + end interface + + interface + subroutine psb_z_cp_hdia_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_hdia_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdia_to_coo + end interface + + interface + subroutine psb_z_cp_hdia_from_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_cp_hdia_to_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_cp_hdia_from_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(in) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_cp_hdia_from_fmt +!!$ end interface + + interface + subroutine psb_z_mv_hdia_to_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdia_to_coo + end interface + + interface + subroutine psb_z_mv_hdia_from_coo(a,b,info) + import :: psb_z_hdia_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hdia_from_coo + end interface + +!!$ interface +!!$ subroutine psb_z_mv_hdia_to_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdia_to_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_mv_hdia_from_fmt(a,b,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ class(psb_z_base_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_mv_hdia_from_fmt +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& +!!$ & imin,imax,jmin,jmax +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_csput_a +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetptn(imin,imax,a,nz,ia,ja,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetptn +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetrow(imin,imax,a,nz,ia,ja,val,info,& +!!$ & jmin,jmax,iren,append,nzin,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_), intent(out) :: nz +!!$ integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) +!!$ complex(psb_dpk_), allocatable, intent(inout) :: val(:) +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetrow +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_csgetblk(imin,imax,a,b,info,& +!!$ & jmin,jmax,iren,append,rscale,cscale) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ class(psb_z_coo_sparse_mat), intent(inout) :: b +!!$ integer(psb_ipk_), intent(in) :: imin,imax +!!$ integer(psb_ipk_),intent(out) :: info +!!$ logical, intent(in), optional :: append +!!$ integer(psb_ipk_), intent(in), optional :: iren(:) +!!$ integer(psb_ipk_), intent(in), optional :: jmin,jmax +!!$ logical, intent(in), optional :: rscale,cscale +!!$ end subroutine psb_z_hdia_csgetblk +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_cssv(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:) +!!$ complex(psb_dpk_), intent(inout) :: y(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_cssv +!!$ subroutine psb_z_hdia_cssm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_cssm +!!$ end interface + + interface + subroutine psb_z_hdia_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hdia_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hdia_csmv +!!$ subroutine psb_z_hdia_csmm(alpha,a,x,beta,y,info,trans) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) +!!$ complex(psb_dpk_), intent(inout) :: y(:,:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, optional, intent(in) :: trans +!!$ end subroutine psb_z_hdia_csmm + end interface + + +!!$ interface +!!$ function psb_z_hdia_maxval(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_maxval +!!$ end interface +!!$ +!!$ interface +!!$ function psb_z_hdia_csnmi(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_csnmi +!!$ end interface +!!$ +!!$ interface +!!$ function psb_z_hdia_csnm1(a) result(res) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_) :: res +!!$ end function psb_z_hdia_csnm1 +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_rowsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_rowsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_arwsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_arwsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_colsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_colsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_aclsum(d,a) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ real(psb_dpk_), intent(out) :: d(:) +!!$ end subroutine psb_z_hdia_aclsum +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_get_diag(a,d,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(in) :: a +!!$ complex(psb_dpk_), intent(out) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_get_diag +!!$ end interface +!!$ +!!$ interface +!!$ subroutine psb_z_hdia_scal(d,a,info,side) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character, intent(in), optional :: side +!!$ end subroutine psb_z_hdia_scal +!!$ end interface + +!!$ interface +!!$ subroutine psb_z_hdia_scals(d,a,info) +!!$ import :: psb_z_hdia_sparse_mat, psb_dpk_, psb_ipk_ +!!$ class(psb_z_hdia_sparse_mat), intent(inout) :: a +!!$ complex(psb_dpk_), intent(in) :: d +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine psb_z_hdia_scals +!!$ end interface +!!$ + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hdia_sizeof(a) result(res) + use psb_realloc_mod, only : psb_size + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + integer(psb_ipk_) :: i + + if (a%is_dev()) call a%sync() + res = 0 + + res = res + psb_size(a%hackOffsets)*psb_sizeof_ip + res = res + psb_size(a%diaOffsets)*psb_sizeof_ip + res = res + psb_size(a%val) * (2*psb_sizeof_dp) + + end function z_hdia_sizeof + + function z_hdia_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HDIA' + end function z_hdia_get_fmt + + function z_hdia_get_nzeros(a) result(res) + implicit none + class(psb_z_hdia_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzeros + end function z_hdia_get_nzeros + + subroutine z_hdia_set_nzeros(a,nz) + implicit none + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + a%nzeros = nz + end subroutine z_hdia_set_nzeros + + ! function z_hdia_get_size(a) result(res) + ! implicit none + ! class(psb_z_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_) :: res + + ! res = -1 + + ! if (allocated(a%ja)) then + ! if (res >= 0) then + ! res = min(res,size(a%ja)) + ! else + ! res = size(a%ja) + ! end if + ! end if + ! if (allocated(a%val)) then + ! if (res >= 0) then + ! res = min(res,size(a%val)) + ! else + ! res = size(a%val) + ! end if + ! end if + + ! end function z_hdia_get_size + + + ! function z_hdia_get_nz_row(idx,a) result(res) + + ! implicit none + + ! class(psb_z_hdia_sparse_mat), intent(in) :: a + ! integer(psb_ipk_), intent(in) :: idx + ! integer(psb_ipk_) :: res + + ! res = 0 + + ! if ((1<=idx).and.(idx<=a%get_nrows())) then + ! res = a%irn(idx) + ! end if + + ! end function z_hdia_get_nz_row + + + + ! ! == =================================== + ! ! + ! ! + ! ! + ! ! Data management + ! ! + ! ! + ! ! + ! ! + ! ! + ! ! == =================================== + + subroutine z_hdia_free(a) + implicit none + + class(psb_z_hdia_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: i, info + + + if (allocated(a%hackOffsets))& + & deallocate(a%hackOffsets,stat=info) + if (allocated(a%diaOffsets))& + & deallocate(a%diaOffsets,stat=info) + if (allocated(a%val))& + & deallocate(a%val,stat=info) + a%nhacks=0 + + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + + return + + end subroutine z_hdia_free + + +end module psb_z_hdia_mat_mod diff --git a/ext/psb_z_hll_mat_mod.f90 b/ext/psb_z_hll_mat_mod.f90 new file mode 100644 index 00000000..98eb403f --- /dev/null +++ b/ext/psb_z_hll_mat_mod.f90 @@ -0,0 +1,564 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_z_hll_mat_mod + + use psb_z_base_mat_mod + use psi_ext_util_mod + + type, extends(psb_z_base_sparse_mat) :: psb_z_hll_sparse_mat + ! + ! HLL format. (Hacked ELL) + ! A modification of ELL. + ! Basic idea: pack and pad data in blocks of HCK rows; + ! this reduces the impact of a lone, very long row. + ! Notes: + ! 1. JA holds the column indices, padded with the row index. + ! 2. VAL holds the coefficients, padded with zeros + ! 3. IDIAG hold the position of the diagonal element + ! or 0 if it is not there, but is only relevant for + ! triangular matrices. In particular, a unit triangular matrix + ! will have IDIAG==0. + ! 4. IRN holds the actual number of nonzeros stored in each row + ! 5. Within a row, the indices are sorted for use of SV. + ! 6. hksz: hack size (multiple of 32) + ! 7. hkoffs(:): offsets of the starts of hacks inside ja/val + ! + ! + ! + integer(psb_ipk_) :: hksz, nzt + integer(psb_ipk_), allocatable :: irn(:), ja(:), idiag(:), hkoffs(:) + complex(psb_dpk_), allocatable :: val(:) + + contains + + procedure, pass(a) :: get_hksz => z_hll_get_hksz + procedure, pass(a) :: set_hksz => z_hll_set_hksz + procedure, pass(a) :: get_size => z_hll_get_size + procedure, pass(a) :: set_nzeros => z_hll_set_nzeros + procedure, pass(a) :: get_nzeros => z_hll_get_nzeros + procedure, nopass :: get_fmt => z_hll_get_fmt + procedure, pass(a) :: sizeof => z_hll_sizeof + procedure, pass(a) :: csmm => psb_z_hll_csmm + procedure, pass(a) :: csmv => psb_z_hll_csmv + procedure, pass(a) :: inner_cssm => psb_z_hll_cssm + procedure, pass(a) :: inner_cssv => psb_z_hll_cssv + procedure, pass(a) :: scals => psb_z_hll_scals + procedure, pass(a) :: scalv => psb_z_hll_scal + procedure, pass(a) :: maxval => psb_z_hll_maxval + procedure, pass(a) :: csnmi => psb_z_hll_csnmi + procedure, pass(a) :: csnm1 => psb_z_hll_csnm1 + procedure, pass(a) :: rowsum => psb_z_hll_rowsum + procedure, pass(a) :: arwsum => psb_z_hll_arwsum + procedure, pass(a) :: colsum => psb_z_hll_colsum + procedure, pass(a) :: aclsum => psb_z_hll_aclsum + procedure, pass(a) :: reallocate_nz => psb_z_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_hll_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_z_cp_hll_to_coo + procedure, pass(a) :: cp_from_coo => psb_z_cp_hll_from_coo + procedure, pass(a) :: cp_to_fmt => psb_z_cp_hll_to_fmt + procedure, pass(a) :: cp_from_fmt => psb_z_cp_hll_from_fmt + procedure, pass(a) :: mv_to_coo => psb_z_mv_hll_to_coo + procedure, pass(a) :: mv_from_coo => psb_z_mv_hll_from_coo + procedure, pass(a) :: mv_to_fmt => psb_z_mv_hll_to_fmt + procedure, pass(a) :: mv_from_fmt => psb_z_mv_hll_from_fmt + procedure, pass(a) :: csput_a => psb_z_hll_csput_a + procedure, pass(a) :: get_diag => psb_z_hll_get_diag + procedure, pass(a) :: csgetptn => psb_z_hll_csgetptn + procedure, pass(a) :: csgetrow => psb_z_hll_csgetrow + procedure, pass(a) :: get_nz_row => z_hll_get_nz_row + procedure, pass(a) :: reinit => psb_z_hll_reinit + procedure, pass(a) :: print => psb_z_hll_print + procedure, pass(a) :: free => z_hll_free + procedure, pass(a) :: mold => psb_z_hll_mold + + end type psb_z_hll_sparse_mat + + private :: z_hll_get_nzeros, z_hll_free, z_hll_get_fmt, & + & z_hll_get_size, z_hll_sizeof, z_hll_get_nz_row, & + & z_hll_set_nzeros, z_hll_get_hksz, z_hll_set_hksz + + interface + subroutine psb_z_hll_reallocate_nz(nz,a) + import :: psb_z_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_z_hll_sparse_mat), intent(inout) :: a + end subroutine psb_z_hll_reallocate_nz + end interface + + interface + subroutine psb_z_hll_reinit(a,clear) + import :: psb_z_hll_sparse_mat + class(psb_z_hll_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_z_hll_reinit + end interface + + interface + subroutine psb_z_hll_mold(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_mold + end interface + + interface + subroutine psb_z_hll_allocate_mnnz(m,n,a,nz) + import :: psb_z_hll_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_hll_allocate_mnnz + end interface + + interface + subroutine psb_z_hll_print(iout,a,iv,head,ivr,ivc) + import :: psb_z_hll_sparse_mat, psb_ipk_, psb_lpk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_lpk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_lpk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_z_hll_print + end interface + + interface + subroutine psb_z_cp_hll_to_coo(a,b,info) + import :: psb_z_coo_sparse_mat, psb_z_hll_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_to_coo + end interface + + interface + subroutine psb_z_cp_hll_from_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_from_coo + end interface + + interface + subroutine psb_z_cp_hll_to_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_to_fmt + end interface + + interface + subroutine psb_z_cp_hll_from_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_cp_hll_from_fmt + end interface + + interface + subroutine psb_z_mv_hll_to_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_to_coo + end interface + + interface + subroutine psb_z_mv_hll_from_coo(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_from_coo + end interface + + interface + subroutine psb_z_mv_hll_to_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_to_fmt + end interface + + interface + subroutine psb_z_mv_hll_from_fmt(a,b,info) + import :: psb_z_hll_sparse_mat, psb_z_base_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_mv_hll_from_fmt + end interface + + interface + subroutine psb_z_hll_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_csput_a + end interface + + interface + subroutine psb_z_hll_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_hll_csgetptn + end interface + + interface + subroutine psb_z_hll_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale,chksz) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + complex(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale,chksz + end subroutine psb_z_hll_csgetrow + end interface + + interface + subroutine psb_z_hll_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_z_coo_sparse_mat, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_z_hll_csgetblk + end interface + + interface + subroutine psb_z_hll_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_cssv + subroutine psb_z_hll_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_cssm + end interface + + interface + subroutine psb_z_hll_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_csmv + subroutine psb_z_hll_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_hll_csmm + end interface + + + interface + function psb_z_hll_maxval(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_maxval + end interface + + interface + function psb_z_hll_csnmi(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_csnmi + end interface + + interface + function psb_z_hll_csnm1(a) result(res) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_z_hll_csnm1 + end interface + + interface + subroutine psb_z_hll_rowsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_rowsum + end interface + + interface + subroutine psb_z_hll_arwsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_arwsum + end interface + + interface + subroutine psb_z_hll_colsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_colsum + end interface + + interface + subroutine psb_z_hll_aclsum(d,a) + import :: psb_z_hll_sparse_mat, psb_dpk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_z_hll_aclsum + end interface + + interface + subroutine psb_z_hll_get_diag(a,d,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_get_diag + end interface + + interface + subroutine psb_z_hll_scal(d,a,info,side) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_z_hll_scal + end interface + + interface + subroutine psb_z_hll_scals(d,a,info) + import :: psb_z_hll_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_z_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_hll_scals + end interface + + interface psi_convert_hll_from_coo + subroutine psi_z_convert_hll_from_coo(a,hksz,tmp,info) + import :: psb_z_hll_sparse_mat, psb_ipk_, psb_z_coo_sparse_mat + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: hksz + class(psb_z_coo_sparse_mat), intent(in) :: tmp + integer(psb_ipk_), intent(out) :: info + end subroutine psi_z_convert_hll_from_coo + end interface + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function z_hll_sizeof(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + if (a%is_dev()) call a%sync() + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%hkoffs) + + end function z_hll_sizeof + + function z_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL' + end function z_hll_get_fmt + + subroutine z_hll_set_nzeros(a,n) + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%nzt = n + end subroutine z_hll_set_nzeros + + function z_hll_get_nzeros(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + res = a%nzt + end function z_hll_get_nzeros + + function z_hll_get_size(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + if (a%is_dev()) call a%sync() + + res = -1 + + if (allocated(a%ja)) then + if (res >= 0) then + res = min(res,size(a%ja)) + else + res = size(a%ja) + end if + end if + if (allocated(a%val)) then + if (res >= 0) then + res = min(res,size(a%val)) + else + res = size(a%val) + end if + end if + + end function z_hll_get_size + + + + function z_hll_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + if ((1<=idx).and.(idx<=a%get_nrows())) then + res = a%irn(idx) + end if + + end function z_hll_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine z_hll_free(a) + implicit none + + class(psb_z_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%idiag)) deallocate(a%idiag) + if (allocated(a%irn)) deallocate(a%irn) + if (allocated(a%ja)) deallocate(a%ja) + if (allocated(a%val)) deallocate(a%val) + if (allocated(a%val)) deallocate(a%hkoffs) + call a%set_null() + call a%set_nrows(izero) + call a%set_ncols(izero) + call a%set_hksz(izero) + + return + + end subroutine z_hll_free + + subroutine z_hll_set_hksz(a,n) + implicit none + class(psb_z_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: n + + a%hksz = n + end subroutine z_hll_set_hksz + + function z_hll_get_hksz(a) result(res) + implicit none + class(psb_z_hll_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = a%hksz + + end function z_hll_get_hksz + +end module psb_z_hll_mat_mod diff --git a/ext/psi_c_ext_util_mod.f90 b/ext/psi_c_ext_util_mod.f90 new file mode 100644 index 00000000..e58c0d93 --- /dev/null +++ b/ext/psi_c_ext_util_mod.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_c_ext_util_mod + + use psb_base_mod, only : psb_ipk_, psb_spk_ + + interface psi_xtr_dia_from_coo + subroutine psi_c_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata,rdisp) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd, ncd, ia(:), ja(:), d(:) + complex(psb_spk_), intent(in) :: val(:) + complex(psb_spk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + end subroutine psi_c_xtr_dia_from_coo + end interface + + interface psi_xtr_ell_from_coo + subroutine psi_c_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,& + & valc,ja,val,irn,diag,ld) + import :: psb_ipk_, psb_spk_ + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + complex(psb_spk_) :: valc(*), val(ld,*) + + end subroutine psi_c_xtr_ell_from_coo + end interface psi_xtr_ell_from_coo + + interface psi_xtr_coo_from_dia + subroutine psi_c_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,& + & info,rdisp) + import :: psb_ipk_, psb_spk_ + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), nz + complex(psb_spk_), intent(inout) :: val(:) + complex(psb_spk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + end subroutine psi_c_xtr_coo_from_dia + end interface + +end module psi_c_ext_util_mod diff --git a/ext/psi_d_ext_util_mod.f90 b/ext/psi_d_ext_util_mod.f90 new file mode 100644 index 00000000..07de8ad1 --- /dev/null +++ b/ext/psi_d_ext_util_mod.f90 @@ -0,0 +1,80 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_d_ext_util_mod + + use psb_base_mod, only : psb_ipk_, psb_dpk_ + + interface psi_xtr_dia_from_coo + subroutine psi_d_xtr_dia_from_coo(nr,nc,nz,ia,ja,val,d,nrd,ncd,data,info,& + & initdata,rdisp) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_), intent(in) :: nr, nc, nz, nrd, ncd, ia(:), ja(:), d(:) + real(psb_dpk_), intent(in) :: val(:) + real(psb_dpk_), intent(out) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initdata + integer(psb_ipk_), intent(in), optional :: rdisp + + end subroutine psi_d_xtr_dia_from_coo + end interface + + interface psi_xtr_ell_from_coo + subroutine psi_d_xtr_ell_from_coo(i,nr,mxrwl,iac,jac,& + & valc,ja,val,irn,diag,ld) + import :: psb_ipk_, psb_dpk_ + implicit none + integer(psb_ipk_) :: i,nr,mxrwl,ld + integer(psb_ipk_) :: iac(*),jac(*),ja(ld,*),irn(*),diag(*) + real(psb_dpk_) :: valc(*), val(ld,*) + + end subroutine psi_d_xtr_ell_from_coo + end interface psi_xtr_ell_from_coo + + interface psi_xtr_coo_from_dia + subroutine psi_d_xtr_coo_from_dia(nr,nc,ia,ja,val,nz,nrd,ncd,data,offsets,& + & info,rdisp) + import :: psb_ipk_, psb_dpk_ + + implicit none + + integer(psb_ipk_), intent(in) :: nr,nc, nrd,ncd, offsets(:) + integer(psb_ipk_), intent(inout) :: ia(:), ja(:), nz + real(psb_dpk_), intent(inout) :: val(:) + real(psb_dpk_), intent(in) :: data(nrd,ncd) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: rdisp + end subroutine psi_d_xtr_coo_from_dia + end interface + +end module psi_d_ext_util_mod diff --git a/ext/psi_ext_util_mod.f90 b/ext/psi_ext_util_mod.f90 new file mode 100644 index 00000000..afb2c749 --- /dev/null +++ b/ext/psi_ext_util_mod.f90 @@ -0,0 +1,41 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_ext_util_mod + + use psi_i_ext_util_mod + use psi_s_ext_util_mod + use psi_c_ext_util_mod + use psi_d_ext_util_mod + use psi_z_ext_util_mod + +end module psi_ext_util_mod diff --git a/ext/psi_i_ext_util_mod.f90 b/ext/psi_i_ext_util_mod.f90 new file mode 100644 index 00000000..ac073f1d --- /dev/null +++ b/ext/psi_i_ext_util_mod.f90 @@ -0,0 +1,175 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psi_i_ext_util_mod + + use psb_base_mod, only : psb_ipk_ + ! + ! Hack size for HLL format. + ! + integer(psb_ipk_), parameter :: psb_hksz_def_ = 32 + integer(psb_ipk_), private, save :: psb_hksz = psb_hksz_def_ + logical, private, save :: psb_hll_use_vector = .true. +contains + + function psi_get_hksz() result(res) + implicit none + integer(psb_ipk_) :: res + res = psb_hksz + end function psi_get_hksz + + subroutine psi_set_hksz(size) + implicit none + integer(psb_ipk_), intent(in) :: size + if (size > 0) psb_hksz = size + end subroutine psi_set_hksz + + subroutine psi_set_hll_vector(val) + implicit none + logical, optional :: val + if (present(val)) then + psb_hll_use_vector = val + else + psb_hll_use_vector = .true. + end if + + end subroutine psi_set_hll_vector + + function psi_get_hll_vector() result(res) + implicit none + logical :: res + + res = psb_hll_use_vector + end function psi_get_hll_vector + + + ! + ! Compute offsets and allocation for DIAgonal storage. + ! Input: + ! nr,nc,nz,ia,ja: the matrix pattern in COO + ! Note: This routine is designed to be called + ! with either a full matrix or an horizontal stripe, + ! with the COO entries sorted in row major order, hence + ! it will handle the conversion of a strip, so it can + ! be used by both DIA and HDIA. In both cases NR and NC + ! *MUST* be the *GLOBAL* number of rows/columns, not those + ! of the strips, i.e. it must be that all entris in IA <=NR + ! and JA <= NC. + ! Output: + ! nd: number of nonzero diagonals + ! d: d(k) contains the index inside offset of diagonal k, + ! which is, if A(I,J) /= 0 then K=NR+J-I, or (optionally) 0. + ! *MUST* be allocated on the *global* size NR+NC-1 + ! + ! offset: for each of the ND nonzero diagonals, its offset J-I + ! + ! Notes: D and OFFSET together represent the set of diagonals; + ! D can be used outside to quickly find which entry of OFFSET + ! a given a(i,j) corresponds to, without doing a search. + ! + ! 1. Optionally init D vector to zeros + ! 2. Walk through the NZ pairs (I,J): + ! a. if it is a new diagonal add to a heap; + ! b. increase its population count stored in D(J-I+NR) + ! c. Keep track of maximum population count. + ! 3. Go through the ND diagonals, getting them K out of the heap in order: + ! a. Set offset(i) to K-NR == J-I + ! b. Set D(K) = i or 0 (depending on cleard) + ! + ! Setting to 0 allows to reuse this function in a loop in a dry run + ! to estimate the allocation size for HDIA; without settng to 0 we + ! would need to zero the whole vector, resulting + ! in a quadratic overall cost. Outside this subroutine, it is possible + ! to zero selectively the entres in D by using the indices in OFFSET. + ! + ! + subroutine psi_dia_offset_from_coo(nr,nc,nz,ia,ja,nd,d,offset,info,& + & initd,cleard) + use psb_base_mod + + implicit none + + integer(psb_ipk_), intent(in) :: nr, nc, nz, ia(:), ja(:) + integer(psb_ipk_), intent(inout) :: d(:) + integer(psb_ipk_), intent(out) :: offset(:) + integer(psb_ipk_), intent(out) :: nd + integer(psb_ipk_), intent(out) :: info + logical, intent(in), optional :: initd,cleard + + type(psb_i_heap) :: heap + integer(psb_ipk_) :: k,i,j,ir,ic, ndiag, id + logical :: initd_, cleard_ + character(len=20) :: name + + info = psb_success_ + initd_ = .true. + if (present(initd)) initd_ = initd + cleard_ = .false. + if (present(cleard)) cleard_ = cleard + + if (initd_) d(:) = 0 + + ndiag = nr+nc-1 + if (size(d)= itmax_) exit restart - it = 0 call psb_geaxpby(done,b,dzero,r,desc_a,info) + if (do_timings) call psb_toc(cg_vect) + if (do_timings) call psb_tic(cg_mv) if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux) if (info /= psb_success_) then info=psb_err_from_subroutine_non_ call psb_errpush(info,name) goto 9999 end if - + if (do_timings) call psb_toc(cg_mv) + + if (do_timings) call psb_tic(cg_vect) rho = dzero call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info) @@ -237,13 +249,18 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& call psb_errpush(psb_err_from_subroutine_non_,name) goto 9999 End If - + if (do_timings) call psb_toc(cg_vect) + iteration: do it = it + 1 itx = itx + 1 - + if (do_timings) call psb_tic(cg_prec) + call prec%apply(r,z,desc_a,info,work=aux) + if (do_timings) call psb_toc(cg_prec) + if (do_timings) call psb_tic(cg_vect) + rho_old = rho rho = psb_gedot(r,z,desc_a,info) @@ -254,13 +271,18 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& if (debug_level >= psb_debug_ext_)& & write(debug_unit,*) me,' ',trim(name),& & ': CG Iteration breakdown rho' + if (do_timings) call psb_toc(cg_vect) exit iteration endif beta = rho/rho_old call psb_geaxpby(done,z,beta,p,desc_a,info) end if + if (do_timings) call psb_toc(cg_vect) + if (do_timings) call psb_tic(cg_mv) call psb_spmm(done,a,p,dzero,q,desc_a,info,work=aux) + if (do_timings) call psb_toc(cg_mv) + if (do_timings) call psb_tic(cg_vect) sigma = psb_gedot(p,q,desc_a,info) if (sigma == dzero) then if (debug_level >= psb_debug_ext_)& @@ -293,6 +315,7 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,& end do iteration end do restart + if (do_timings) call psb_toc(cg_vect) if (do_cond) then if (me == psb_root_) then #if defined(HAVE_LAPACK) diff --git a/rsb/Makefile b/rsb/Makefile new file mode 100755 index 00000000..58be5ee2 --- /dev/null +++ b/rsb/Makefile @@ -0,0 +1,53 @@ +include ../Make.inc +# +# Libraries used +# +PSBLIBDIR=$(PSBLASDIR)/lib/ +PSBINCDIR=$(PSBLASDIR)/include +PSBMODDIR=$(PSBLASDIR)/modules +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(FIFLAG). $(LIBRSB_INCLUDES) $(LIBRSB_DEFINES) +CINCLUDES=-I$(GPU_INCDIR) -I$(CUDA_INCDIR) +LIBNAME=libpsb_rsb.a + + +FOBJS= rsb_mod.o psb_d_rsb_mat_mod.o \ + psb_rsb_penv_mod.o psb_rsb_mod.o + +COBJS= rsb_int.o + +OBJS=$(COBJS) $(FOBJS) + +lib: objs ilib + /bin/cp -p $(LIBNAME) $(LIBDIR) + +objs: $(OBJS) iobjs + /bin/cp -p *$(.mod) $(MODDIR) + +iobjs: + $(MAKE) -C impl objs +ilib: iobjs + $(MAKE) -C impl lib LIBNAME=$(LIBNAME) + +clean: cclean iclean + /bin/rm -f $(FOBJS) *$(.mod) *.a + +cclean: + /bin/rm -f $(COBJS) +iclean: + cd impl && $(MAKE) clean + +verycleanlib: + (cd ../..; make veryclean) + + + diff --git a/rsb/impl/Makefile b/rsb/impl/Makefile new file mode 100755 index 00000000..925f8e42 --- /dev/null +++ b/rsb/impl/Makefile @@ -0,0 +1,30 @@ +include ../../Make.inc +PSBLIBDIR=$(PSBLASDIR)/lib/ +PSBINCDIR=$(PSBLASDIR)/include +PSBMODDIR=$(PSBLASDIR)/modules +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +PSBLAS_LIB= -L$(PSBLIBDIR) -lpsb_util -lpsb_base +#-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +LDLIBS=$(PSBLDLIBS) +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_INCLUDES) $(FIFLAG).. $(LIBRSB_DEFINES) +CINCLUDES= +LIBNAME=libpsb_rsb.a + +OBJS= \ +psb_d_cp_rsb_from_coo.o \ +psb_d_mv_rsb_from_coo.o \ +psb_d_cp_rsb_to_coo.o psb_d_rsb_csmv.o + +objs: $(OBJS) + +lib: objs + ar cur ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) diff --git a/rsb/impl/psb_d_cp_rsb_from_coo.F90 b/rsb/impl/psb_d_cp_rsb_from_coo.F90 new file mode 100644 index 00000000..14592877 --- /dev/null +++ b/rsb/impl/psb_d_cp_rsb_from_coo.F90 @@ -0,0 +1,78 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_rsb_from_coo(a,b,info) + + use psb_base_mod + use rsb_mod + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_from_coo + implicit none + + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + type(psb_d_coo_sparse_mat) :: tmp + Integer(Psb_ipk_) :: nza, nr, i,j,irw, idl,err_act, nc + integer(psb_ipk_) :: nzm, ir, ic, k ,bs + integer(psb_ipk_) :: debug_level, debug_unit + character(len=20) :: name + + info = psb_success_ +#ifdef HAVE_RSB + ! This is to have fix_coo called behind the scenes + call b%cp_to_coo(tmp,info) + + call tmp%fix(info) + if (info /= psb_success_) return + + nr = tmp%get_nrows() + nc = tmp%get_ncols() + nza = tmp%get_nzeros() + ! If it is sorted then we can lessen memory impact + a%psb_d_base_sparse_mat = tmp%psb_d_base_sparse_mat + + bs = 1!RSB_DEFAULT_BLOCKING + + info = Rsb_from_coo(a%rsbMat,b%val,b%ia,b%ja,nza,nr,nc,bs,bs) + + call tmp%free() +#endif + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_cp_rsb_from_coo diff --git a/rsb/impl/psb_d_cp_rsb_to_coo.f90 b/rsb/impl/psb_d_cp_rsb_to_coo.f90 new file mode 100644 index 00000000..3747100f --- /dev/null +++ b/rsb/impl/psb_d_cp_rsb_to_coo.f90 @@ -0,0 +1,77 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_cp_rsb_to_coo(a,b,info) + + use psb_base_mod + use rsb + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_cp_rsb_to_coo + implicit none + + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + real(psb_dpk_), pointer :: val_point(:) + type(c_ptr) :: t_p,s_p + + !locals + integer(psb_ipk_) :: i, j, k,nr,nza,nc + + info = psb_success_ + + nr = a%get_nrows() + nc = a%get_ncols() + nza = a%get_nzeros() + + call b%allocate(nr,nc,nza) + b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat + + allocate(val_point(nza)) + + t_p = c_loc(val_point(1)) + + info = rsb_mtx_get_coo(a%rsbMat, t_p, b%ia, b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE) + + !info = rsb_mtx_switch_to_coo(a%rsbMat,t_p,b%ia,b%ja,RSB_FLAG_FORTRAN_INDICES_INTERFACE) + + k = rsb_perror(s_p,info) + + do i=1,nza + b%val(i)=val_point(i) + enddo + + deallocate(val_point) + + call b%set_nzeros(nza) + call b%fix(info) + +end subroutine psb_d_cp_rsb_to_coo diff --git a/rsb/impl/psb_d_mv_rsb_from_coo.f90 b/rsb/impl/psb_d_mv_rsb_from_coo.f90 new file mode 100644 index 00000000..9de74ec7 --- /dev/null +++ b/rsb/impl/psb_d_mv_rsb_from_coo.f90 @@ -0,0 +1,114 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_mv_rsb_from_coo(a,b,info) + + use psb_base_mod + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_mv_rsb_from_coo + implicit none + + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + !locals + Integer(Psb_ipk_) :: nza, nr, i,j,k, idl,err_act, nc, nzm, ir, ic + + info = psb_success_ + + call b%fix(info) + if (info /= psb_success_) return + + nr = b%get_nrows() + nc = b%get_ncols() + nza = b%get_nzeros() + ! if (b%is_sorted()) then + ! ! If it is sorted then we can lessen memory impact + ! a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat + + ! ! First compute the number of nonzeros in each row. + ! call psb_realloc(nr,a%irn,info) + ! if (info /= 0) goto 9999 + ! a%irn = 0 + ! do i=1, nza + ! a%irn(b%ia(i)) = a%irn(b%ia(i)) + 1 + ! end do + ! nzm = 0 + ! do i=1, nr + ! nzm = max(nzm,a%irn(i)) + ! a%irn(i) = 0 + ! end do + ! ! Second: copy the column indices. + ! call psb_realloc(nr,a%idiag,info) + ! if (info == 0) call psb_realloc(nr,nzm,a%ja,info) + ! if (info /= 0) goto 9999 + ! do i=1, nza + ! ir = b%ia(i) + ! ic = b%ja(i) + ! j = a%irn(ir) + 1 + ! a%ja(ir,j) = ic + ! a%irn(ir) = j + ! end do + ! ! Third copy the other stuff + ! deallocate(b%ia,b%ja,stat=info) + ! if (info == 0) call psb_realloc(nr,a%idiag,info) + ! if (info == 0) call psb_realloc(nr,nzm,a%val,info) + ! if (info /= 0) goto 9999 + ! k = 0 + ! do i=1, nr + ! a%idiag(i) = 0 + ! do j=1, a%irn(i) + ! k = k + 1 + ! a%val(i,j) = b%val(k) + ! if (i==a%ja(i,j)) a%idiag(i)=j + ! end do + ! do j=a%irn(i)+1, nzm + ! a%ja(i,j) = i + ! a%val(i,j) = dzero + ! end do + ! end do + + ! else + ! If b is not sorted, the only way is to copy. + call a%cp_from_coo(b,info) + if (info /= 0) goto 9999 + ! end if + + call b%free() + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_mv_rsb_from_coo diff --git a/rsb/impl/psb_d_rsb_csmv.F90 b/rsb/impl/psb_d_rsb_csmv.F90 new file mode 100644 index 00000000..f5dbba97 --- /dev/null +++ b/rsb/impl/psb_d_rsb_csmv.F90 @@ -0,0 +1,138 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) + + use psb_base_mod + use rsb_mod + use psb_d_rsb_mat_mod, psb_protect_name => psb_d_rsb_csmv + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer, intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer :: i,j,k,m,n, nnz, ir, jc + real(psb_dpk_) :: acc + type(c_ptr) :: gpX, gpY + logical :: tra + Integer :: err_act + character(len=20) :: name='d_rsb_csmv' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ +#ifdef HAVE_RSB + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info,name) + goto 9999 + endif + + + tra = (psb_toupper(trans_) == 'T').or.(psb_toupper(trans_)=='C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) d_rsb_get_size + procedure, pass(a) :: get_nzeros => d_rsb_get_nzeros + procedure, nopass :: get_fmt => d_rsb_get_fmt + procedure, pass(a) :: sizeof => d_rsb_sizeof + ! procedure, pass(a) :: csmm => psb_d_rsb_csmm + procedure, pass(a) :: csmv => psb_d_rsb_csmv + ! procedure, pass(a) :: inner_cssm => psb_d_rsb_cssm + ! procedure, pass(a) :: inner_cssv => psb_d_rsb_cssv + ! procedure, pass(a) :: scals => psb_d_rsb_scals + ! procedure, pass(a) :: scalv => psb_d_rsb_scal + ! procedure, pass(a) :: maxval => psb_d_rsb_maxval + ! procedure, pass(a) :: csnmi => psb_d_rsb_csnmi + ! procedure, pass(a) :: csnm1 => psb_d_rsb_csnm1 + ! procedure, pass(a) :: rowsum => psb_d_rsb_rowsum + ! procedure, pass(a) :: arwsum => psb_d_rsb_arwsum + ! procedure, pass(a) :: colsum => psb_d_rsb_colsum + ! procedure, pass(a) :: aclsum => psb_d_rsb_aclsum + ! procedure, pass(a) :: reallocate_nz => psb_d_rsb_reallocate_nz + ! procedure, pass(a) :: allocate_mnnz => psb_d_rsb_allocate_mnnz + procedure, pass(a) :: cp_to_coo => psb_d_cp_rsb_to_coo + procedure, pass(a) :: cp_from_coo => psb_d_cp_rsb_from_coo + ! procedure, pass(a) :: cp_to_fmt => psb_d_cp_rsb_to_fmt + ! procedure, pass(a) :: cp_from_fmt => psb_d_cp_rsb_from_fmt +! procedure, pass(a) :: mv_to_coo => psb_d_mv_rsb_to_coo + procedure, pass(a) :: mv_from_coo => psb_d_mv_rsb_from_coo + ! procedure, pass(a) :: mv_to_fmt => psb_d_mv_rsb_to_fmt + ! procedure, pass(a) :: mv_from_fmt => psb_d_mv_rsb_from_fmt + ! procedure, pass(a) :: csput => psb_d_rsb_csput + ! procedure, pass(a) :: get_diag => psb_d_rsb_get_diag + ! procedure, pass(a) :: csgetptn => psb_d_rsb_csgetptn + ! procedure, pass(a) :: csgetrow => psb_d_rsb_csgetrow + ! procedure, pass(a) :: get_nz_row => d_rsb_get_nz_row + ! procedure, pass(a) :: reinit => psb_d_rsb_reinit + ! procedure, pass(a) :: trim => psb_d_rsb_trim + ! procedure, pass(a) :: print => psb_d_rsb_print + procedure, pass(a) :: free => d_rsb_free + ! procedure, pass(a) :: mold => psb_d_rsb_mold + + end type psb_d_rsb_sparse_mat + + private :: d_rsb_get_nzeros, d_rsb_free, d_rsb_get_fmt, & + & d_rsb_get_size, d_rsb_sizeof, d_rsb_get_nz_row + + interface + subroutine psb_d_rsb_reallocate_nz(nz,a) + import :: psb_d_rsb_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: nz + class(psb_d_rsb_sparse_mat), intent(inout) :: a + end subroutine psb_d_rsb_reallocate_nz + end interface + + interface + subroutine psb_d_rsb_reinit(a,clear) + import :: psb_d_rsb_sparse_mat + class(psb_d_rsb_sparse_mat), intent(inout) :: a + logical, intent(in), optional :: clear + end subroutine psb_d_rsb_reinit + end interface + + interface + subroutine psb_d_rsb_trim(a) + import :: psb_d_rsb_sparse_mat + class(psb_d_rsb_sparse_mat), intent(inout) :: a + end subroutine psb_d_rsb_trim + end interface + + interface + subroutine psb_d_rsb_mold(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_rsb_mold + end interface + + interface + subroutine psb_d_rsb_allocate_mnnz(m,n,a,nz) + import :: psb_d_rsb_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: m,n + class(psb_d_rsb_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_rsb_allocate_mnnz + end interface + + interface + subroutine psb_d_rsb_print(iout,a,iv,head,ivr,ivc) + import :: psb_d_rsb_sparse_mat, psb_ipk_ + integer(psb_ipk_), intent(in) :: iout + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in), optional :: iv(:) + character(len=*), optional :: head + integer(psb_ipk_), intent(in), optional :: ivr(:), ivc(:) + end subroutine psb_d_rsb_print + end interface + + interface + subroutine psb_d_cp_rsb_to_coo(a,b,info) + import :: psb_d_coo_sparse_mat, psb_d_rsb_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_to_coo + end interface + + interface + subroutine psb_d_cp_rsb_from_coo(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_from_coo + end interface + + interface + subroutine psb_d_cp_rsb_to_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_to_fmt + end interface + + interface + subroutine psb_d_cp_rsb_from_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_cp_rsb_from_fmt + end interface + + interface + subroutine psb_d_mv_rsb_to_coo(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_to_coo + end interface + + interface + subroutine psb_d_mv_rsb_from_coo(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_from_coo + end interface + + interface + subroutine psb_d_mv_rsb_to_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_to_fmt + end interface + + interface + subroutine psb_d_mv_rsb_from_fmt(a,b,info) + import :: psb_d_rsb_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_mv_rsb_from_fmt + end interface + + interface + subroutine psb_d_rsb_csput(nz,ia,ja,val,a,imin,imax,jmin,jmax,info,gtl) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(in) :: nz,ia(:), ja(:),& + & imin,imax,jmin,jmax + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: gtl(:) + end subroutine psb_d_rsb_csput + end interface + + interface + subroutine psb_d_rsb_csgetptn(imin,imax,a,nz,ia,ja,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_rsb_csgetptn + end interface + + interface + subroutine psb_d_rsb_csgetrow(imin,imax,a,nz,ia,ja,val,info,& + & jmin,jmax,iren,append,nzin,rscale,cscale) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_), intent(out) :: nz + integer(psb_ipk_), allocatable, intent(inout) :: ia(:), ja(:) + real(psb_dpk_), allocatable, intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax, nzin + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_rsb_csgetrow + end interface + + interface + subroutine psb_d_rsb_csgetblk(imin,imax,a,b,info,& + & jmin,jmax,iren,append,rscale,cscale) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(in) :: imin,imax + integer(psb_ipk_),intent(out) :: info + logical, intent(in), optional :: append + integer(psb_ipk_), intent(in), optional :: iren(:) + integer(psb_ipk_), intent(in), optional :: jmin,jmax + logical, intent(in), optional :: rscale,cscale + end subroutine psb_d_rsb_csgetblk + end interface + + interface + subroutine psb_d_rsb_cssv(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_cssv + subroutine psb_d_rsb_cssm(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_cssm + end interface + + interface + subroutine psb_d_rsb_csmv(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_csmv + subroutine psb_d_rsb_csmm(alpha,a,x,beta,y,info,trans) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_rsb_csmm + end interface + + + interface + function psb_d_rsb_maxval(a) result(res) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_rsb_maxval + end interface + + interface + function psb_d_rsb_csnmi(a) result(res) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_rsb_csnmi + end interface + + interface + function psb_d_rsb_csnm1(a) result(res) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_) :: res + end function psb_d_rsb_csnm1 + end interface + + interface + subroutine psb_d_rsb_rowsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_rowsum + end interface + + interface + subroutine psb_d_rsb_arwsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_arwsum + end interface + + interface + subroutine psb_d_rsb_colsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_colsum + end interface + + interface + subroutine psb_d_rsb_aclsum(d,a) + import :: psb_d_rsb_sparse_mat, psb_dpk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + end subroutine psb_d_rsb_aclsum + end interface + + interface + subroutine psb_d_rsb_get_diag(a,d,info) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(out) :: d(:) + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_rsb_get_diag + end interface + + interface + subroutine psb_d_rsb_scal(d,a,info,side) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + end subroutine psb_d_rsb_scal + end interface + + interface + subroutine psb_d_rsb_scals(d,a,info) + import :: psb_d_rsb_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_rsb_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_rsb_scals + end interface + + + +contains + + ! == =================================== + ! + ! + ! + ! Getters + ! + ! + ! + ! + ! + ! == =================================== + + + function d_rsb_sizeof(a) result(res) + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + + + + end function d_rsb_sizeof + + function d_rsb_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'RSB' + end function d_rsb_get_fmt + + function d_rsb_get_nzeros(a) result(res) + use rsb_mod + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + res = Rsb_get_nzeros(a%rsbMat) + + end function d_rsb_get_nzeros + + function d_rsb_get_size(a) result(res) + implicit none + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_) :: res + + end function d_rsb_get_size + + + function d_rsb_get_nz_row(idx,a) result(res) + + implicit none + + class(psb_d_rsb_sparse_mat), intent(in) :: a + integer(psb_ipk_), intent(in) :: idx + integer(psb_ipk_) :: res + + res = 0 + + + end function d_rsb_get_nz_row + + + + ! == =================================== + ! + ! + ! + ! Data management + ! + ! + ! + ! + ! + ! == =================================== + + subroutine d_rsb_free(a) + use rsb_mod + implicit none + + class(psb_d_rsb_sparse_mat), intent(inout) :: a + + call freeRsbMat(a%rsbMat) + + call a%set_null() + call a%set_nrows(0) + call a%set_ncols(0) + + return + + end subroutine d_rsb_free + + +end module psb_d_rsb_mat_mod diff --git a/rsb/psb_rsb_mod.F90 b/rsb/psb_rsb_mod.F90 new file mode 100644 index 00000000..73db825a --- /dev/null +++ b/rsb/psb_rsb_mod.F90 @@ -0,0 +1,50 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_rsb_mod + use psb_const_mod + use rsb_mod + use psb_rsb_penv_mod + ! use psb_d_ell_mat_mod + ! use psb_s_ell_mat_mod + ! use psb_z_ell_mat_mod + ! use psb_c_ell_mat_mod + + ! use psb_s_hll_mat_mod + ! use psb_d_hll_mat_mod + ! use psb_c_hll_mat_mod + ! use psb_z_hll_mat_mod + + ! use psb_d_dia_mat_mod + ! use psb_d_hdia_mat_mod + use psb_d_rsb_mat_mod +end module psb_rsb_mod diff --git a/rsb/psb_rsb_penv_mod.F90 b/rsb/psb_rsb_penv_mod.F90 new file mode 100644 index 00000000..12a56d83 --- /dev/null +++ b/rsb/psb_rsb_penv_mod.F90 @@ -0,0 +1,99 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module psb_rsb_penv_mod + use psb_const_mod + use psb_penv_mod + !use psi_comm_buffers_mod, only : psb_buffer_queue + use iso_c_binding + +! interface psb_rsb_init +! module procedure psb_rsb_init +! end interface +#if defined(HAVE_RSB) + interface + function psb_C_rsb_init() & + & result(res) bind(c,name='rsbInit') + use iso_c_binding + integer(c_int) :: res + end function psb_C_rsb_init + end interface + + interface + function psb_C_rsb_exit() & + & result(res) bind(c,name='rsbExit') + use iso_c_binding + integer(c_int) :: res + end function psb_C_rsb_exit + end interface + +#endif + +contains + ! !!!!!!!!!!!!!!!!!!!!!! + ! + ! Environment handling + ! + ! !!!!!!!!!!!!!!!!!!!!!! + + + subroutine psb_rsb_init() + use psb_penv_mod + use psb_const_mod + use psb_error_mod + ! type(psb_ctxt_type), intent(in) :: ctxt + ! integer, intent(in), optional :: dev + + integer :: info + +#if defined (HAVE_RSB) + info = psb_C_rsb_init() + if (info/=0) write(*,*) 'error during rsb_init' +#endif + end subroutine psb_rsb_init + + subroutine psb_rsb_exit() + use psb_penv_mod + use psb_const_mod + use psb_error_mod + ! type(psb_ctxt_type), intent(in) :: ctxt + ! integer, intent(in), optional :: dev + + integer :: info + +#if defined (HAVE_RSB) + info = psb_C_rsb_exit() + if (info/=0) write(*,*) 'error during rsb_exit' +#endif + end subroutine psb_rsb_exit + +end module psb_rsb_penv_mod diff --git a/rsb/rsb_int.c b/rsb/rsb_int.c new file mode 100644 index 00000000..dc4a8026 --- /dev/null +++ b/rsb/rsb_int.c @@ -0,0 +1,110 @@ + /* Parallel Sparse BLAS GPU plugin */ + /* (C) Copyright 2013 */ + + /* Salvatore Filippone */ + /* Alessandro Fanfarillo */ + + /* Redistribution and use in source and binary forms, with or without */ + /* modification, are permitted provided that the following conditions */ + /* are met: */ + /* 1. Redistributions of source code must retain the above copyright */ + /* notice, this list of conditions and the following disclaimer. */ + /* 2. Redistributions in binary form must reproduce the above copyright */ + /* notice, this list of conditions, and the following disclaimer in the */ + /* documentation and/or other materials provided with the distribution. */ + /* 3. The name of the PSBLAS group or the names of its contributors may */ + /* not be used to endorse or promote products derived from this */ + /* software without specific written permission. */ + + /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS */ + /* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */ + /* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ + /* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS */ + /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR */ + /* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF */ + /* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS */ + /* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN */ + /* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) */ + /* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ + /* POSSIBILITY OF SUCH DAMAGE. */ + +#include +#if defined(HAVE_RSB) +#include "rsb.h" +#include "rsb_int.h" + +int rsbInit() +{ + rsb_err_t errval = RSB_ERR_NO_ERROR; + + if((errval = rsb_lib_init(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR) + { + printf("Error initializing the library!\n"); + return 1; + } + + return 0; +} + +int rsbExit() +{ + rsb_err_t errval = RSB_ERR_NO_ERROR; + + if((errval = rsb_lib_exit(RSB_NULL_INIT_OPTIONS))!=RSB_ERR_NO_ERROR) + { + printf("Error finalizing the library!\n"); + return 1; + } + + return 0; +} + +int Rsb_double_from_coo(void **rsbMat, double *va, int *ia,int *ja,int nnz,int nr, + int nc, int br, int bc) +{ + int i=0; + rsb_err_t errval = RSB_ERR_NO_ERROR; + + *rsbMat = rsb_mtx_alloc_from_coo_const(va,ia,ja,nnz,RSB_NUMERICAL_TYPE_DOUBLE,nr,nc,br,bc,RSB_FLAG_FORTRAN_INDICES_INTERFACE,&errval); + + if((!*rsbMat) || (errval != RSB_ERR_NO_ERROR)) + { + printf("Error while allocating the matrix!\n"); + return 1; + } + return 0; +} + +//X is the input and y is the output +int Rsb_double_spmv(void *rsbMat, double *x, double alfa, double *y, double beta,char trans) +{ + rsb_err_t errval = RSB_ERR_NO_ERROR; + + if(trans=='N') + errval = rsb_spmv(RSB_TRANSPOSITION_N,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1); + else + errval = rsb_spmv(RSB_TRANSPOSITION_T,&alfa,(struct rsb_mtx_t *)rsbMat,x,1,&beta,y,1); + + if(errval != RSB_ERR_NO_ERROR) + { + printf("Error performing a multiplication!\n"); + return 1; + } + + return 0; +} + +//Should it return a long instead of integer? +int Rsb_getNZeros(void *rsbMat) +{ + int res = 0; + rsb_mtx_get_info((struct rsb_mtx_t *)rsbMat,RSB_MIF_MATRIX_NNZ__TO__RSB_NNZ_INDEX_T,(void *)&res); + return res; +} + +void freeRsbMat(void *rsbMat) +{ + rsb_mtx_free(rsbMat); +} + +#endif diff --git a/rsb/rsb_int.h b/rsb/rsb_int.h new file mode 100644 index 00000000..c08f114d --- /dev/null +++ b/rsb/rsb_int.h @@ -0,0 +1,2 @@ +int Rsb_double_from_coo(void **rsbMat,double *va, int *ia,int *ja,int nnz,int nr, + int nc, int br, int bc); diff --git a/rsb/rsb_mod.F90 b/rsb/rsb_mod.F90 new file mode 100644 index 00000000..e5252654 --- /dev/null +++ b/rsb/rsb_mod.F90 @@ -0,0 +1,235 @@ +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! + + +module rsb_mod + use rsb + use iso_c_binding + +#ifdef HAVE_RSB + + interface Rsb_from_coo + function Rsb_double_from_coo(rsbMat,va,ia,ja,nnz,nr,nc,br,bc) & + & result(res) bind(c,name='Rsb_double_from_coo') + use iso_c_binding + integer(c_int) :: res + type(c_ptr) :: rsbMat + real(c_double) :: va(*) + integer(c_int) :: ia(*),ja(*) + integer(c_int),value :: nnz,nr,nc,br,bc + end function Rsb_double_from_coo + end interface Rsb_from_coo + + interface + function Rsb_get_nzeros(rsbMat) & + & result(res) bind(c,name='Rsb_getNZeros') + use iso_c_binding + integer(c_int) :: res + type(c_ptr),value :: rsbMat + end function Rsb_get_nzeros + end interface + + interface Rsb_spmv + function Rsb_double_spmv(rsbMat,x,alfa,y,beta,trans) & + & result(res) bind(c,name='Rsb_double_spmv') + use iso_c_binding + integer(c_int) :: res + type(c_ptr),value :: rsbMat + real(c_double) :: x(*),y(*) + real(c_double),value :: alfa,beta + character(c_char),value :: trans + end function Rsb_double_spmv + end interface Rsb_spmv + + interface + subroutine freeRsbMat(rsbMat) & + & bind(c,name='freeRsbMat') + use iso_c_binding + type(c_ptr), value :: rsbMat + end subroutine freeRsbMat + end interface + + ! interface writeEllDevice + + ! function writeEllDeviceFloat(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceFloat') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_float) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceFloat + + ! function writeEllDeviceDouble(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_double) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceDouble + + ! function writeEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceFloatComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_float_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceFloatComplex + + ! function writeEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='writeEllDeviceDoubleComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_double_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function writeEllDeviceDoubleComplex + + ! end interface writeEllDevice + + ! interface readEllDevice + + ! function readEllDeviceFloat(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceFloat') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_float) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceFloat + + ! function readEllDeviceDouble(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! real(c_double) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceDouble + + ! function readEllDeviceFloatComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceFloatComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_float_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceFloatComplex + + ! function readEllDeviceDoubleComplex(deviceMat,val,ja,ldj,irn) & + ! & result(res) bind(c,name='readEllDeviceDoubleComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat + ! integer(c_int), value :: ldj + ! complex(c_double_complex) :: val(ldj,*) + ! integer(c_int) :: ja(ldj,*),irn(*) + ! end function readEllDeviceDoubleComplex + + ! end interface readEllDevice + + ! interface + ! subroutine resetEllTimer() bind(c,name='resetEllTimer') + ! use iso_c_binding + ! end subroutine resetEllTimer + ! end interface + ! interface + ! function getEllTimer() & + ! & bind(c,name='getEllTimer') result(res) + ! use iso_c_binding + ! real(c_double) :: res + ! end function getEllTimer + ! end interface + + + ! interface + ! function getEllDevicePitch(deviceMat) & + ! & bind(c,name='getEllDevicePitch') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getEllDevicePitch + ! end interface + + ! interface + ! function getEllDeviceMaxRowSize(deviceMat) & + ! & bind(c,name='getEllDeviceMaxRowSize') result(res) + ! use iso_c_binding + ! type(c_ptr), value :: deviceMat + ! integer(c_int) :: res + ! end function getEllDeviceMaxRowSize + ! end interface + + + ! interface spmvEllDevice + ! function spmvEllDeviceFloat(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceFloat') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! real(c_float),value :: alpha, beta + ! end function spmvEllDeviceFloat + ! function spmvEllDeviceDouble(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceDouble') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! real(c_double),value :: alpha, beta + ! end function spmvEllDeviceDouble + ! function spmvEllDeviceFloatComplex(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceFloatComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! complex(c_float_complex),value :: alpha, beta + ! end function spmvEllDeviceFloatComplex + ! function spmvEllDeviceDoubleComplex(deviceMat,alpha,x,beta,y) & + ! & result(res) bind(c,name='spmvEllDeviceDoubleComplex') + ! use iso_c_binding + ! integer(c_int) :: res + ! type(c_ptr), value :: deviceMat, x, y + ! complex(c_double_complex),value :: alpha, beta + ! end function spmvEllDeviceDoubleComplex + ! end interface spmvEllDevice + +#endif + + +end module rsb_mod diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile new file mode 100755 index 00000000..5d938973 --- /dev/null +++ b/test/cudakern/Makefile @@ -0,0 +1,65 @@ +TOPDIR=../.. +include $(TOPDIR)/Make.inc +# +# Libraries used +# +LIBDIR=$(TOPDIR)/lib/ +PSBLIBDIR=$(TOPDIR)/lib/ +OPTDIR=$(LIBDIR) +PSBINCDIR=$(TOPDIR)/include +PSBMODDIR=$(TOPDIR)/modules +PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) $(LCUDA) -lpsb_ext -lpsb_util -lpsb_base +INCDIR=$(TOPDIR)/include +MODDIR=$(TOPDIR)/modules + +LDLIBS=$(PSBGPULDLIBS) + +FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) + +ZTOBJS=z_file_spmv.o data_input.o +CTOBJS=c_file_spmv.o data_input.o +DTOBJS=d_file_spmv.o data_input.o +STOBJS=s_file_spmv.o data_input.o +DPGOBJS=dpdegenmv.o +SPGOBJS=spdegenmv.o +EXEDIR=./runs + +all: dir pgen file +pgen: dpdegenmv spdegenmv +file: s_file_spmv c_file_spmv d_file_spmv z_file_spmv +dpdegenmv spdegenmv s_file_spmv c_file_spmv d_file_spmv z_file_spmv: dir +dir: + (if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi) + +dpdegenmv: $(DPGOBJS) + $(FLINK) $(LOPT) $(DPGOBJS) -fopenmp -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) + /bin/mv dpdegenmv $(EXEDIR) +spdegenmv: $(SPGOBJS) + $(FLINK) $(LOPT) $(SPGOBJS) -o spdegenmv $(PSBLAS_LIB) $(LDLIBS) + /bin/mv spdegenmv $(EXEDIR) + +z_file_spmv: $(ZTOBJS) + $(FLINK) $(LOPT) $(ZTOBJS) -o z_file_spmv $(PSBLAS_LIB) $(LDLIBS) + /bin/mv z_file_spmv $(EXEDIR) +c_file_spmv: $(CTOBJS) + $(FLINK) $(LOPT) $(CTOBJS) -o c_file_spmv $(PSBLAS_LIB) $(LDLIBS) + /bin/mv c_file_spmv $(EXEDIR) +d_file_spmv: $(DTOBJS) + $(FLINK) $(LOPT) $(DTOBJS) -o d_file_spmv $(PSBLAS_LIB) $(LDLIBS) + /bin/mv d_file_spmv $(EXEDIR) +s_file_spmv: $(STOBJS) + $(FLINK) $(LOPT) $(STOBJS) -o s_file_spmv $(PSBLAS_LIB) $(LDLIBS) + /bin/cp s_file_spmv $(EXEDIR) + +d_file_spmv.o s_file_spmv.o z_file_spmv.o c_file_spmv.o: data_input.o + +clean: + /bin/rm -f $(DTOBJS) $(STOBJS) $(DPGOBJS) $(SPGOBJS) $(ZTOBJS) $(CTOBJS) \ + $(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv \ + $(EXEDIR)/d_file_spmv $(EXEDIR)/s_file_spmv \ + $(EXEDIR)/z_file_spmv $(EXEDIR)/c_file_spmv + +lib: + (cd ../../; make library) +verycleanlib: + (cd ../../; make veryclean) diff --git a/test/cudakern/c_file_spmv.F90 b/test/cudakern/c_file_spmv.F90 new file mode 100644 index 00000000..2f9840ec --- /dev/null +++ b/test/cudakern/c_file_spmv.F90 @@ -0,0 +1,491 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program c_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_cspmat_type) :: a, aux_a, agpu + + ! dense matrices + complex(psb_spk_), allocatable, target :: aux_b(:,:), d(:) + complex(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + complex(psb_spk_), pointer :: b_col_glob(:) + type(psb_c_vect_type) :: b_col, x_col, r_col + type(psb_c_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_c_vect_gpu) :: vmold +#endif + complex(psb_spk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_spk_) :: damatsize, dgmatsize + complex(psb_spk_) :: err, eps + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_c_coo_sparse_mat), target :: acoo + type(psb_c_csr_sparse_mat), target :: acsr + type(psb_c_ell_sparse_mat), target :: aell + type(psb_c_hll_sparse_mat), target :: ahll +#ifdef HAVE_GPU + type(psb_c_elg_sparse_mat), target :: aelg + type(psb_c_csrg_sparse_mat), target :: acsrg + type(psb_c_hybg_sparse_mat), target :: ahybg + type(psb_c_hlg_sparse_mat), target :: ahlg +#endif + class(psb_c_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg + integer :: internal, m,ii,nnzero + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(cone + (cone*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(cone,a,xv,czero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(cone,agpu,xv,czero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(cone,agpu,xg,czero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-cone,bg,+cone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + damatsize = amatsize + damatsize = damatsize/(1024*1024) + dgmatsize = agmatsize + dgmatsize = dgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,damatsize) + call psb_sum(ctxt,dgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(2*psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program c_file_spmv + + + + + diff --git a/test/cudakern/d_file_spmv.F90 b/test/cudakern/d_file_spmv.F90 new file mode 100644 index 00000000..2bbc0bc4 --- /dev/null +++ b/test/cudakern/d_file_spmv.F90 @@ -0,0 +1,496 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program d_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_dspmat_type) :: a, aux_a, agpu + + ! dense matrices + real(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) + real(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + real(psb_dpk_), pointer :: b_col_glob(:) + type(psb_d_vect_type) :: b_col, x_col, r_col + type(psb_d_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_d_vect_gpu) :: vmold +#endif + real(psb_dpk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: err, eps, damatsize, dgmatsize + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_d_coo_sparse_mat), target :: acoo + type(psb_d_csr_sparse_mat), target :: acsr + type(psb_d_ell_sparse_mat), target :: aell + type(psb_d_hll_sparse_mat), target :: ahll + type(psb_d_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_GPU + type(psb_d_elg_sparse_mat), target :: aelg + type(psb_d_csrg_sparse_mat), target :: acsrg + type(psb_d_hybg_sparse_mat), target :: ahybg + type(psb_d_hlg_sparse_mat), target :: ahlg + type(psb_d_hdiag_sparse_mat), target :: ahdiag +#endif + class(psb_d_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer(psb_lpk_) :: i,j,nrt, ns, nr, ig, nrg + integer(psb_ipk_) :: internal, m,ii,nnzero,info, ipart + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('HDIA') + acmold => ahdia + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case('HDIAG') + agmold => ahdiag + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(done + (done*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,a,xv,dzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-done,bg,+done,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + damatsize = amatsize + damatsize = damatsize/(1024*1024) + dgmatsize = agmatsize + dgmatsize = dgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,damatsize) + call psb_sum(ctxt,dgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program d_file_spmv + + + + + diff --git a/test/cudakern/data_input.f90 b/test/cudakern/data_input.f90 new file mode 100644 index 00000000..274cb7ad --- /dev/null +++ b/test/cudakern/data_input.f90 @@ -0,0 +1,221 @@ +! +! +! MLD2P4 version 2.0 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.0) +! +! (C) Copyright 2008,2009,2010 +! +! Salvatore Filippone +! Alfredo Buttari +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the MLD2P4 group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +module data_input + + interface read_data + module procedure read_char, read_int,& + & read_double, read_single, read_logical,& + & string_read_char, string_read_int,& + & string_read_double, string_read_single, & + & string_read_logical + end interface read_data + interface trim_string + module procedure trim_string + end interface + + character(len=4096), private :: charbuf + character, private, parameter :: def_marker="!" + +contains + + subroutine read_logical(val,file,marker) + logical, intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_logical + + subroutine read_char(val,file,marker) + character(len=*), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_char + + + subroutine read_int(val,file,marker) + integer, intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_int + subroutine read_single(val,file,marker) + use psb_base_mod + real(psb_spk_), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_single + subroutine read_double(val,file,marker) + use psb_base_mod + real(psb_dpk_), intent(out) :: val + integer, intent(in) :: file + character(len=1), optional, intent(in) :: marker + + read(file,'(a)')charbuf + call read_data(val,charbuf,marker) + + end subroutine read_double + + subroutine string_read_char(val,file,marker) + character(len=*), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),'(a)') val + end subroutine string_read_char + + subroutine string_read_int(val,file,marker) + integer, intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_int + + subroutine string_read_single(val,file,marker) + use psb_base_mod + real(psb_spk_), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_single + + subroutine string_read_double(val,file,marker) + use psb_base_mod + real(psb_dpk_), intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_double + + subroutine string_read_logical(val,file,marker) + use psb_base_mod + logical, intent(out) :: val + character(len=*), intent(in) :: file + character(len=1), optional, intent(in) :: marker + character(len=1) :: marker_ + character(len=1024) :: charbuf + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + read(file,'(a)')charbuf + charbuf = adjustl(charbuf) + idx=index(charbuf,marker_) + if (idx == 0) idx = len(charbuf)+1 + read(charbuf(1:idx-1),*) val + end subroutine string_read_logical + + function trim_string(string,marker) + character(len=*), intent(in) :: string + character(len=1), optional, intent(in) :: marker + character(len=len(string)) :: trim_string + character(len=1) :: marker_ + integer :: idx + if (present(marker)) then + marker_ = marker + else + marker_ = def_marker + end if + idx=index(string,marker_) + trim_string = adjustl(string(idx:)) + end function trim_string +end module data_input + diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90 new file mode 100644 index 00000000..85059e81 --- /dev/null +++ b/test/cudakern/dpdegenmv.F90 @@ -0,0 +1,1015 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: dpdegenmv.f90 +! +! Program: pdegenmv +! This sample program measures the performance of the matrix-vector product. +! The matrix is generated in the same way as for the pdegen test case of +! the main PSBLAS library. +! +! +module psb_d_pde3d_mod + + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_vect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + +contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=done/sqrt((3*done)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=done/sqrt((3*done)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=done/sqrt((3*done)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv,tnd) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + logical, optional :: tnd + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_dpk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + logical :: tnd_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) + else + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_d_gen_pde3d + + +end module psb_d_pde3d_mod + + +program pdgenmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_CUDA + use psb_cuda_mod +#endif +#ifdef HAVE_RSB + use psb_rsb_mod +#endif + use psb_d_pde3d_mod + implicit none + + ! input parameters + character(len=5) :: acfmt, agfmt + integer :: idim + logical :: tnd + ! miscellaneous + real(psb_dpk_), parameter :: one = 1.d0 + real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& + & tt1, tt2, gt1, gt2, gflops, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + + ! sparse matrix and preconditioner + type(psb_dspmat_type) :: a, agpu, aux_a + ! descriptor + type(psb_desc_type) :: desc_a + ! dense matrices + type(psb_d_vect_type), target :: xv, bv, xg, bg +#ifdef HAVE_CUDA + type(psb_d_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold +#endif + real(psb_dpk_), allocatable :: x1(:), x2(:), x0(:) + ! blacs parameters + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: err, eps + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + type(psb_d_coo_sparse_mat), target :: acoo + type(psb_d_csr_sparse_mat), target :: acsr + type(psb_d_ell_sparse_mat), target :: aell + type(psb_d_hll_sparse_mat), target :: ahll + type(psb_d_dia_sparse_mat), target :: adia + type(psb_d_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_RSB + type(psb_d_rsb_sparse_mat), target :: arsb +#endif +#ifdef HAVE_CUDA + type(psb_d_cuda_elg_sparse_mat), target :: aelg + type(psb_d_cuda_csrg_sparse_mat), target :: acsrg +#if CUDA_SHORT_VERSION <= 10 + type(psb_d_cuda_hybg_sparse_mat), target :: ahybg +#endif + type(psb_d_cuda_hlg_sparse_mat), target :: ahlg + type(psb_d_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_d_cuda_dnsg_sparse_mat), target :: adnsg +#endif + class(psb_d_base_sparse_mat), pointer :: agmold, acmold + ! other variables + logical, parameter :: dump=.false. + integer(psb_ipk_) :: info, i, j, nr, nrg + integer(psb_lpk_) :: ig + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + +#ifdef HAVE_CUDA + call psb_cuda_init(ctxt) +#endif +#ifdef HAVE_RSB + call psb_rsb_init() +#endif + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmv-cuda' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_CUDA + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) +#endif + ! + ! get parameters + ! + call get_parms(ctxt,acfmt,agfmt,idim,tnd) + call psb_init_timers() + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3,tnd=tnd) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'pde',idim,'-',iam,'-',np,'.mtx' + call a%print(fname,head='PDEGEN test matrix') + end if + + select case(psb_toupper(acfmt)) + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('DIA') + acmold => adia + case('HDIA') + acmold => ahdia + case('CSR') + acmold => acsr + case('COO') + acmold => acoo +#ifdef HAVE_RSB + case('RSB') + acmold => arsb +#endif + case default + write(*,*) 'Unknown format defaulting to HLL' + acmold => ahll + end select + call a%cscnv(info,mold=acmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + +#ifdef HAVE_CUDA + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('HDIAG') + agmold => ahdiag + case('CSRG') + agmold => acsrg + case('DNSG') + agmold => adnsg +#if CUDA_SHORT_VERSION <= 10 + case('HYBG') + agmold => ahybg +#endif + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select + call a%cscnv(agpu,info,mold=agmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + call desc_a%cnv(mold=imold) + + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_geasb(xg,desc_a,info,scratch=.true.,mold=vmold) +#endif + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x0,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + x0(i) = 1.0 + (1.0*ig)/nrg + end do + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + call psb_geall(x1,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + call psb_geasb(x1,desc_a,info) + call xv%bld(x0) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_CUDA + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(x0,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_cuda_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + + call xv%set(x0) + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,a,xv,dzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_CUDA + call xg%set(x0) + + ! FIXME: cache flush needed here + x1 = bv%get_vect() + x2 = bg%get_vect() + + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_cuda_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + x1 = bv%get_vect() + x2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + ! FIXME: cache flush needed here + call xg%set(x0) + call xg%sync() + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info) + ! For timing purposes we need to make sure all threads + ! in the device are done. + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_cuda_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + x1 = bv%get_vect() + x2 = bg%get_vect() + call psb_geaxpby(-done,bg,+done,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a)')'XCPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x1(1:nr),'Local part CPU',info,filename=fname) + write(fname,'(a,i3.3,a,i3.3,a)')'XGPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x2(1:nr),'Local part GPU',info,filename=fname) + end if + +#endif + annz = a%get_nzeros() + amatsize = a%sizeof() + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,nr) + call psb_sum(ctxt,annz) + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,& + & '("Matrix: ell1 ",i0)') idim + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nr + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation : ",i20," ")') amatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_CUDA + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_CUDA + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU.) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU.) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU.) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_CUDA + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth + bdwdth = psb_cuda_MemoryPeakBandwidth() + write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + call psb_print_timers(ctxt) + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +#ifdef HAVE_CUDA + call psb_cuda_exit() +#endif + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,acfmt,agfmt,idim,tnd) + type(psb_ctxt_type) :: ctxt + character(len=*) :: agfmt, acfmt + integer :: idim + logical :: tnd + integer :: np, iam + integer :: intbuf(10), ip + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + write(*,*) 'CPU side format?' + read(psb_inp_unit,*) acfmt + write(*,*) 'CUDA side format?' + read(psb_inp_unit,*) agfmt + write(*,*) 'Size of discretization cube?' + read(psb_inp_unit,*) idim + write(*,*) 'Try comm/comp overlap?' + read(psb_inp_unit,*) tnd + endif + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,idim) + call psb_bcast(ctxt,tnd) + + if (iam == 0) then + write(psb_out_unit,'("Testing matrix : ell1")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + write(psb_out_unit,'("Data distribution : BLOCK")') + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage formats ",a)') acfmt,' ',agfmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd + end if + return + + end subroutine get_parms + +end program pdgenmv diff --git a/test/cudakern/s_file_spmv.F90 b/test/cudakern/s_file_spmv.F90 new file mode 100644 index 00000000..37a52717 --- /dev/null +++ b/test/cudakern/s_file_spmv.F90 @@ -0,0 +1,496 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program s_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_sspmat_type) :: a, aux_a, agpu + + ! dense matrices + real(psb_spk_), allocatable, target :: aux_b(:,:), d(:) + real(psb_spk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + real(psb_spk_), pointer :: b_col_glob(:) + type(psb_s_vect_type) :: b_col, x_col, r_col + type(psb_s_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_s_vect_gpu) :: vmold +#endif + real(psb_spk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_spk_) :: err, eps, samatsize, sgmatsize + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_s_coo_sparse_mat), target :: acoo + type(psb_s_csr_sparse_mat), target :: acsr + type(psb_s_ell_sparse_mat), target :: aell + type(psb_s_hll_sparse_mat), target :: ahll + type(psb_s_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_GPU + type(psb_s_elg_sparse_mat), target :: aelg + type(psb_s_csrg_sparse_mat), target :: acsrg + type(psb_s_hybg_sparse_mat), target :: ahybg + type(psb_s_hlg_sparse_mat), target :: ahlg + type(psb_s_hdiag_sparse_mat), target :: ahdiag +#endif + class(psb_s_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg + integer :: internal, m,ii,nnzero + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('HDIA') + acmold => ahdia + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case('HDIAG') + agmold => ahdiag + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(sone + (sone*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,a,xv,szero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + samatsize = amatsize + samatsize = samatsize/(1024*1024) + sgmatsize = agmatsize + sgmatsize = sgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,samatsize) + call psb_sum(ctxt,sgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') samatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') sgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+& + & annz*(psb_sizeof_sp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program s_file_spmv + + + + + diff --git a/test/cudakern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90 new file mode 100644 index 00000000..f953e163 --- /dev/null +++ b/test/cudakern/spdegenmv.F90 @@ -0,0 +1,992 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: spdegenmv.f90 +! +! Program: pdegenmv +! This sample program measures the performance of the matrix-vector product. +! The matrix is generated in the same way as for the pdegen test case of +! the main PSBLAS library. +! +! +module psb_s_pde3d_mod + + + use psb_base_mod, only : psb_spk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_sspmat_type, psb_s_vect_type, szero,& + & psb_s_base_sparse_mat, psb_s_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function s_func_3d(x,y,z) result(val) + import :: psb_spk_ + real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_) :: val + end function s_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_s_gen_pde3d + end interface psb_gen_pde3d + +contains + + function s_null_func_3d(x,y,z) result(val) + + real(psb_spk_), intent(in) :: x,y,z + real(psb_spk_) :: val + + val = szero + + end function s_null_func_3d + ! + ! functions parametrizing the differential equation + ! + function b1(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b1 + real(psb_spk_), intent(in) :: x,y,z + b1=sone/sqrt((3*sone)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b2 + real(psb_spk_), intent(in) :: x,y,z + b2=sone/sqrt((3*sone)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: b3 + real(psb_spk_), intent(in) :: x,y,z + b3=sone/sqrt((3*sone)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: c + real(psb_spk_), intent(in) :: x,y,z + c=szero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a1 + real(psb_spk_), intent(in) :: x,y,z + a1=sone/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a2 + real(psb_spk_), intent(in) :: x,y,z + a2=sone/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: a3 + real(psb_spk_), intent(in) :: x,y,z + a3=sone/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_spk_, sone, szero + implicit none + real(psb_spk_) :: g + real(psb_spk_), intent(in) :: x,y,z + g = szero + if (x == sone) then + g = sone + else if (x == szero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_s_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_sspmat_type) :: a + type(psb_s_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(s_func_3d), optional :: f + class(psb_s_base_sparse_mat), optional :: amold + class(psb_s_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_s_csc_sparse_mat) :: acsc + type(psb_s_coo_sparse_mat) :: acoo + type(psb_s_csr_sparse_mat) :: acsr + real(psb_spk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_spk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_spk_) :: deltah, sqdeltah, deltah2 + real(psb_spk_), parameter :: rhs=szero,one=sone,zero=szero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(s_func_3d), pointer :: f_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => s_null_func_3d + end if + + deltah = sone/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*sone)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,szero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,szero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,sone)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,sone,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(sone,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=szero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold) + else + call psb_spasb(a,desc_a,info,afmt=afmt) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_s_gen_pde3d + + +end module psb_s_pde3d_mod + + +program pdgenmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_CUDA + use psb_cuda_mod +#endif + use psb_s_pde3d_mod + implicit none + + ! input parameters + character(len=5) :: acfmt, agfmt + integer :: idim + + ! miscellaneous + real(psb_spk_), parameter :: one = 1.e0 + real(psb_dpk_) :: t1, t2, tprec, flops, tflops,& + & tt1, tt2, gt1, gt2, gflops, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + + ! sparse matrix and preconditioner + type(psb_sspmat_type) :: a, agpu, aux_a + ! descriptor + type(psb_desc_type) :: desc_a + ! dense matrices + type(psb_s_vect_type), target :: xv,bv, xg, bg +#ifdef HAVE_CUDA + type(psb_s_vect_cuda) :: vmold + type(psb_i_vect_cuda) :: imold +#endif + real(psb_spk_), allocatable :: x1(:), x2(:), x0(:) + ! blacs parameters + type(psb_ctxt_type) :: ctxt + integer :: iam, np + + ! solver parameters + integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes + real(psb_spk_) :: err, eps + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + type(psb_s_coo_sparse_mat), target :: acoo + type(psb_s_csr_sparse_mat), target :: acsr + type(psb_s_ell_sparse_mat), target :: aell + type(psb_s_hll_sparse_mat), target :: ahll + type(psb_s_dia_sparse_mat), target :: adia + type(psb_s_hdia_sparse_mat), target :: ahdia +#ifdef HAVE_CUDA + type(psb_s_cuda_elg_sparse_mat), target :: aelg + type(psb_s_cuda_csrg_sparse_mat), target :: acsrg +#if CUDA_SHORT_VERSION <= 10 + type(psb_s_cuda_hybg_sparse_mat), target :: ahybg +#endif + type(psb_s_cuda_hlg_sparse_mat), target :: ahlg + type(psb_s_cuda_hdiag_sparse_mat), target :: ahdiag + type(psb_s_cuda_dnsg_sparse_mat), target :: adnsg +#endif + class(psb_s_base_sparse_mat), pointer :: agmold, acmold + ! other variables + logical, parameter :: dump=.false. + integer(psb_ipk_) :: info, i, j, nr, nrg + integer(psb_lpk_) :: ig + character(len=20) :: name,ch_err + character(len=40) :: fname + + info=psb_success_ + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) + +#ifdef HAVE_CUDA + call psb_cuda_init(ctxt) +#endif +#ifdef HAVE_RSB + call psb_rsb_init() +#endif + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if(psb_get_errstatus() /= 0) goto 9999 + name='pdegenmv-cuda' + ! + ! Hello world + ! + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_CUDA + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_cuda_DeviceName()) +#endif + ! + ! get parameters + ! + call get_parms(ctxt,acfmt,agfmt,idim) + + ! + ! allocate and fill in the coefficient matrix and initial vectors + ! + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,'CSR ',info,partition=3) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='create_matrix' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit,'("Overall matrix creation time : ",es12.5)')t2 + if (iam == psb_root_) write(psb_out_unit,'(" ")') + + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'pde',idim,'-',iam,'-',np,'.mtx' + call a%print(fname,head='PDEGEN test matrix') + end if + + select case(psb_toupper(acfmt)) + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case('DIA') + acmold => adia + case('HDIA') + acmold => ahdia + case('CSR') + acmold => acsr + case('COO') + acmold => acoo +#ifdef HAVE_RSB + case('RSB') + acmold => arsb +#endif + case default + write(*,*) 'Unknown format defaulting to HLL' + acmold => ahll + end select + call a%cscnv(info,mold=acmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + +#ifdef HAVE_CUDA + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('HDIAG') + agmold => ahdiag + case('CSRG') + agmold => acsrg + case('DNSG') + agmold => adnsg +#if CUDA_SHORT_VERSION <= 10 + case('HYBG') + agmold => ahybg +#endif + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select + call a%cscnv(agpu,info,mold=agmold) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From cscnv ',info + call psb_error() + stop + end if + call desc_a%cnv(mold=imold) + + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_geasb(xg,desc_a,info,scratch=.true.,mold=vmold) +#endif + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x0,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + x0(i) = 1.0 + (1.0*ig)/nrg + end do + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + call psb_geall(x1,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + call psb_geasb(x1,desc_a,info) + call xv%bld(x0) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_CUDA + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(x0,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_cuda_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + + call xv%set(x0) + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,a,xv,szero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_CUDA + call xg%set(x0) + + ! FIXME: cache flush needed here + x1 = bv%get_vect() + x2 = bg%get_vect() + + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_cuda_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + x1 = bv%get_vect() + x2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + + ! FIXME: cache flush needed here + call xg%set(x0) + call xg%sync() + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info) + ! For timing purposes we need to make sure all threads + ! in the device are done. + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_cuda_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + x1 = bv%get_vect() + x2 = bg%get_vect() + call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + eps = maxval(abs(x1(1:nr)-x2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps + if (dump) then + write(fname,'(a,i3.3,a,i3.3,a)')'XCPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x1(1:nr),'Local part CPU',info,filename=fname) + write(fname,'(a,i3.3,a,i3.3,a)')'XGPU-out-',iam,'-',np,'.mtx' + call mm_array_write(x2(1:nr),'Local part GPU',info,filename=fname) + end if +#endif + annz = a%get_nzeros() + amatsize = a%sizeof() + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,nr) + call psb_sum(ctxt,annz) + call psb_sum(ctxt,amatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,& + & '("Matrix: ell1 ",i0)') idim + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nr + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation : ",i20," ")') amatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_CUDA + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_CUDA + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU.) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU.) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU.) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+& + & annz*(psb_sizeof_sp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_CUDA + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S sust. effective bandwidth (GPU) : ",F20.3)') bdwdth + bdwdth = psb_cuda_MemoryPeakBandwidth() + write(psb_out_unit,'("MBYTES/S peak bandwidth (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + ! + ! cleanup storage and exit + ! + call psb_gefree(bv,desc_a,info) + call psb_gefree(xv,desc_a,info) + call psb_spfree(a,desc_a,info) + call psb_cdfree(desc_a,info) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='free routine' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if +#ifdef HAVE_CUDA + call psb_cuda_exit() +#endif + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +contains + ! + ! get iteration parameters from standard input + ! + subroutine get_parms(ctxt,acfmt,agfmt,idim) + type(psb_ctxt_type) :: ctxt + character(len=*) :: agfmt, acfmt + integer :: idim + integer :: np, iam + integer :: intbuf(10), ip + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + write(*,*) 'CPU side format?' + read(psb_inp_unit,*) acfmt + write(*,*) 'CUDA side format?' + read(psb_inp_unit,*) agfmt + write(*,*) 'Size of discretization cube?' + read(psb_inp_unit,*) idim + endif + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,idim) + + if (iam == 0) then + write(psb_out_unit,'("Testing matrix : ell1")') + write(psb_out_unit,'("Grid dimensions : ",i4,"x",i4,"x",i4)')idim,idim,idim + write(psb_out_unit,'("Number of processors : ",i0)')np + write(psb_out_unit,'("Data distribution : BLOCK")') + write(psb_out_unit,'(" ")') + end if + return + + end subroutine get_parms + + +end program pdgenmv diff --git a/test/cudakern/z_file_spmv.F90 b/test/cudakern/z_file_spmv.F90 new file mode 100644 index 00000000..153dd5e1 --- /dev/null +++ b/test/cudakern/z_file_spmv.F90 @@ -0,0 +1,491 @@ +! +! Parallel Sparse BLAS GPU plugin +! (C) Copyright 2013 +! Salvatore Filippone +! Alessandro Fanfarillo +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +program z_file_spmv + use psb_base_mod + use psb_util_mod + use psb_ext_mod +#ifdef HAVE_GPU + use psb_gpu_mod +#endif + use data_input + implicit none + + ! input parameters + character(len=200) :: mtrx_file + + ! sparse matrices + type(psb_zspmat_type) :: a, aux_a, agpu + + ! dense matrices + complex(psb_dpk_), allocatable, target :: aux_b(:,:), d(:) + complex(psb_dpk_), allocatable , save :: x_col_glob(:), r_col_glob(:) + complex(psb_dpk_), pointer :: b_col_glob(:) + type(psb_z_vect_type) :: b_col, x_col, r_col + type(psb_z_vect_type) :: xg, bg, xv, bv +#ifdef HAVE_GPU + type(psb_z_vect_gpu) :: vmold +#endif + complex(psb_dpk_), allocatable :: xc1(:),xc2(:) + ! communications data structure + type(psb_desc_type):: desc_a + + type(psb_ctxt_type) :: ctxt + integer :: iam, np + integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes + real(psb_dpk_) :: damatsize, dgmatsize + complex(psb_dpk_) :: err, eps + + character(len=5) :: acfmt, agfmt + character(len=20) :: name + character(len=2) :: filefmt + integer, parameter :: iunit=12 + integer, parameter :: times=2000 + integer, parameter :: ntests=200, ngpu=50, ncnv=20 + + type(psb_z_coo_sparse_mat), target :: acoo + type(psb_z_csr_sparse_mat), target :: acsr + type(psb_z_ell_sparse_mat), target :: aell + type(psb_z_hll_sparse_mat), target :: ahll +#ifdef HAVE_GPU + type(psb_z_elg_sparse_mat), target :: aelg + type(psb_z_csrg_sparse_mat), target :: acsrg + type(psb_z_hybg_sparse_mat), target :: ahybg + type(psb_z_hlg_sparse_mat), target :: ahlg +#endif + class(psb_z_base_sparse_mat), pointer :: acmold, agmold + ! other variables + integer :: i,info,j,nrt, ns, nr, ipart, ig, nrg + integer :: internal, m,ii,nnzero + real(psb_dpk_) :: t0,t1, t2, tprec, flops + real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,& + & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1 + integer :: nrhs, nrow, n_row, dim, nv, ne + integer, allocatable :: ivg(:), ipv(:) + + + call psb_init(ctxt) + call psb_info(ctxt,iam,np) +#ifdef HAVE_GPU + call psb_gpu_init(ctxt) +#endif + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + + + name='file_spmv' + if(psb_get_errstatus() /= 0) goto 9999 + info=psb_success_ + call psb_set_errverbosity(2) + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_ + write(*,*) 'This is the ',trim(name),' sample program' + end if +#ifdef HAVE_GPU + write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount() + write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName()) +#endif + + if (iam == 0) then + write(*,*) 'Matrix? ' + call read_data(mtrx_file,psb_inp_unit) + write(*,*) 'file format' + call read_data(filefmt,psb_inp_unit) + write(*,*) 'CPU format' + call read_data(acfmt,psb_inp_unit) + write(*,*) 'GPU format' + call read_data(agfmt,psb_inp_unit) + write(*,*) 'distribution ' + call read_data(ipart,psb_inp_unit) + write(*,*) 'Read all data, going on' + end if + call psb_bcast(ctxt,mtrx_file) + call psb_bcast(ctxt,filefmt) + call psb_bcast(ctxt,acfmt) + call psb_bcast(ctxt,agfmt) + call psb_bcast(ctxt,ipart) + call psb_barrier(ctxt) + t0 = psb_wtime() + ! read the input matrix to be processed and (possibly) the rhs + nrhs = 1 + + if (iam==psb_root_) then + select case(psb_toupper(filefmt)) + case('MM') + ! For Matrix Market we have an input file for the matrix + ! and an (optional) second file for the RHS. + call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case ('HB') + ! For Harwell-Boeing we have a single file which may or may not + ! contain an RHS. + call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file) + + case default + info = -1 + write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt + end select + if (info /= 0) then + write(psb_err_unit,*) 'Error while reading input matrix ' + call psb_abort(ctxt) + end if + + ! + ! Always get nnz from original matrix. + ! Some formats add fill-in and do not keep track + ! of how many were added. So if the original matrix + ! contained some extra zeros, the count of entries + ! is not recoverable exactly. + ! + nrt = aux_a%get_nrows() + annz = aux_a%get_nzeros() + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + write(psb_out_unit,'("Generating an rhs...")') + write(psb_out_unit,'(" ")') + call psb_realloc(nrt,1,aux_b,info) + if (info /= 0) then + call psb_errpush(4000,name) + goto 9999 + endif + + b_col_glob => aux_b(:,1) + do i=1, nrt + b_col_glob(i) = 1.d0 + enddo + + else + + call psb_bcast(ctxt,annz) + call psb_bcast(ctxt,nrt) + + end if + + + select case(psb_toupper(acfmt)) + case('COO') + acmold => acoo + case('CSR') + acmold => acsr + case('ELL') + acmold => aell + case('HLL') + acmold => ahll + case default + write(*,*) 'Unknown format defaulting to CSR' + acmold => acsr + end select + +#ifdef HAVE_GPU + select case(psb_toupper(agfmt)) + case('ELG') + agmold => aelg + case('HLG') + agmold => ahlg + case('CSRG') + agmold => acsrg + case('HYBG') + agmold => ahybg + case default + write(*,*) 'Unknown format defaulting to HLG' + agmold => ahlg + end select +#endif + + + ! switch over different partition types + if (ipart == 0) then + call psb_barrier(ctxt) + if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")') + allocate(ivg(nrt),ipv(np)) + do i=1,nrt + call part_block(i,nrt,np,ipv,nv) + ivg(i) = ipv(1) + enddo + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else if (ipart == 2) then + if (iam==psb_root_) then + write(psb_out_unit,'("Partition type: graph")') + write(psb_out_unit,'(" ")') + ! write(psb_err_unit,'("Build type: graph")') + call build_mtpart(aux_a,np) + endif + call psb_barrier(ctxt) + call distr_mtpart(psb_root_,ctxt) + call getv_mtpart(ivg) + call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg) + else + if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")') + call psb_matdist(aux_a, a, ctxt,desc_a,info,parts=part_block) + end if + + call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_) + + t2 = psb_wtime() - t0 + + call psb_amx(ctxt, t2) + + if (iam==psb_root_) then + write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2 + write(psb_out_unit,'(" ")') + end if + call a%cscnv(aux_a,info,mold=acoo) + tcnvcsr = 0 + tcnvgpu = 0 + nr = desc_a%get_local_rows() + nrg = desc_a%get_global_rows() + call psb_geall(x_col,desc_a,info) + do i=1, nr + call desc_a%l2g(i,ig,info) + call psb_geins(ione,(/ig/),(/(zone + (zone*ig)/nrg)/),x_col,desc_a,info) + end do + call psb_geasb(x_col,desc_a,info) + do j=1, ncnv + call aux_a%cscnv(a,info,mold=acoo) + call psb_barrier(ctxt) + t1 = psb_wtime() + call a%cscnv(info,mold=acmold) + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + tcnvcsr = tcnvcsr + t2 + if (j==1) tcnvc1 = t2 + xc1 = x_col%get_vect() + call xv%bld(xc1) + call psb_geasb(bv,desc_a,info,scratch=.true.) + +#ifdef HAVE_GPU + + call aux_a%cscnv(agpu,info,mold=acoo) + call xg%bld(xc1,mold=vmold) + call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold) + call psb_barrier(ctxt) + t1 = psb_wtime() + call agpu%cscnv(info,mold=agmold) + call psb_gpu_DeviceSync() + t2 = psb_Wtime() -t1 + call psb_amx(ctxt,t2) + if (j==1) tcnvg1 = t2 + tcnvgpu = tcnvgpu + t2 +#endif + end do + + call psb_barrier(ctxt) + t1 = psb_wtime() + do i=1,ntests + call psb_spmm(zone,a,xv,zzero,bv,desc_a,info) + end do + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt,t2) + +#ifdef HAVE_GPU + ! FIXME: cache flush needed here + call psb_barrier(ctxt) + tt1 = psb_wtime() + do i=1,ntests + call psb_spmm(zone,agpu,xv,zzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 1 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + tt2 = psb_wtime() - tt1 + call psb_amx(ctxt,tt2) + xc1 = bv%get_vect() + xc2 = bg%get_vect() + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on xGPU',eps + + call xg%sync() + ! FIXME: cache flush needed here + + call psb_barrier(ctxt) + gt1 = psb_wtime() + do i=1,ntests*ngpu + call psb_spmm(zone,agpu,xg,zzero,bg,desc_a,info) + if ((info /= 0).or.(psb_get_errstatus()/=0)) then + write(0,*) 'From 2 spmm',info,i,ntests + call psb_error() + stop + end if + + end do + ! For timing purposes we need to make sure all threads + ! in the device are done. + call psb_gpu_DeviceSync() + call psb_barrier(ctxt) + gt2 = psb_wtime() - gt1 + call psb_amx(ctxt,gt2) + call bg%sync() + xc1 = bv%get_vect() + xc2 = bg%get_vect() + call psb_geaxpby(-zone,bg,+zone,bv,desc_a,info) + eps = psb_geamax(bv,desc_a,info) + + call psb_amx(ctxt,t2) + nr = desc_a%get_local_rows() + eps = maxval(abs(xc1(1:nr)-xc2(1:nr))) + call psb_amx(ctxt,eps) + if (iam==0) write(*,*) 'Max diff on GPU',eps +#endif + + + amatsize = a%sizeof() + agmatsize = agpu%sizeof() + damatsize = amatsize + damatsize = damatsize/(1024*1024) + dgmatsize = agmatsize + dgmatsize = dgmatsize/(1024*1024) + descsize = psb_sizeof(desc_a) + call psb_sum(ctxt,damatsize) + call psb_sum(ctxt,dgmatsize) + call psb_sum(ctxt,descsize) + + if (iam == psb_root_) then + write(psb_out_unit,'("Matrix: ",a)') mtrx_file + write(psb_out_unit,& + &'("Test on : ",i20," processors")') np + write(psb_out_unit,& + &'("Size of matrix : ",i20," ")') nrt + write(psb_out_unit,& + &'("Number of nonzeros : ",i20," ")') annz + write(psb_out_unit,& + &'("Memory occupation CPU (MBytes) : ",f20.2," ")') damatsize + write(psb_out_unit,& + &'("Memory occupation GPU (MBytes) : ",f20.2," ")') dgmatsize + write(psb_out_unit,& + &'("Memory occupation CPU (Bytes) : ",i24," ")') amatsize + write(psb_out_unit,& + &'("Memory occupation GPU (Bytes) : ",i24," ")') agmatsize + flops = ntests*(2.d0*annz) + tflops = flops + gflops = flops * ngpu + write(psb_out_unit,'("Storage type for A: ",a)') a%get_fmt() +#ifdef HAVE_GPU + write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt() + write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')& + & tcnvc1 + write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')& + & tcnvcsr + write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')& + & tcnvcsr/ncnv + write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')& + & tcnvg1 + write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')& + & tcnvgpu + write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')& + & tcnvgpu/ncnv + +#endif + write(psb_out_unit,& + & '("Number of flops (",i0," prod) : ",F20.0," ")') & + & ntests,flops + + flops = flops / (t2) + tflops = tflops / (tt2) + gflops = gflops / (gt2) + write(psb_out_unit,'("Time for ",i6," products (s) (CPU) : ",F20.3)')& + & ntests,t2 + write(psb_out_unit,'("Time per product (ms) (CPU) : ",F20.3)')& + & t2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (CPU) : ",F20.3)')& + & flops/1.d6 +#ifdef HAVE_GPU + + write(psb_out_unit,'("Time for ",i6," products (s) (xGPU) : ",F20.3)')& + & ntests, tt2 + write(psb_out_unit,'("Time per product (ms) (xGPU) : ",F20.3)')& + & tt2*1.d3/(1.d0*ntests) + write(psb_out_unit,'("MFLOPS (xGPU) : ",F20.3)')& + & tflops/1.d6 + + write(psb_out_unit,'("Time for ",i6," products (s) (GPU) : ",F20.3)')& + & ngpu*ntests,gt2 + write(psb_out_unit,'("Time per product (ms) (GPU) : ",F20.3)')& + & gt2*1.d3/(1.d0*ntests*ngpu) + write(psb_out_unit,'("MFLOPS (GPU) : ",F20.3)')& + & gflops/1.d6 +#endif + ! + ! This computation assumes the data movement associated with CSR: + ! it is minimal in terms of coefficients. Other formats may either move + ! more data (padding etc.) or less data (if they can save on the indices). + ! + nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+& + & annz*(2*psb_sizeof_dp + psb_sizeof_ip) + bdwdth = ntests*nbytes/(t2*1.d6) + write(psb_out_unit,*) + write(psb_out_unit,'("MBYTES/S (CPU) : ",F20.3)') bdwdth +#ifdef HAVE_GPU + bdwdth = ngpu*ntests*nbytes/(gt2*1.d6) + write(psb_out_unit,'("MBYTES/S (GPU) : ",F20.3)') bdwdth +#endif + write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt() + write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize + + end if + + call psb_gefree(b_col, desc_a,info) + call psb_gefree(x_col, desc_a,info) + call psb_gefree(xv, desc_a,info) + call psb_gefree(bv, desc_a,info) + call psb_spfree(a, desc_a,info) +#ifdef HAVE_GPU + call psb_gefree(xg, desc_a,info) + call psb_gefree(bg, desc_a,info) + call psb_spfree(agpu,desc_a,info) + call psb_gpu_exit() +#endif + call psb_cdfree(desc_a,info) + + call psb_exit(ctxt) + stop + +9999 continue + call psb_error(ctxt) + +end program z_file_spmv + + + + + diff --git a/test/hello/Makefile b/test/hello/Makefile index a6811ea7..f16ff75e 100644 --- a/test/hello/Makefile +++ b/test/hello/Makefile @@ -16,7 +16,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG). EXEDIR=./runs -all: runsd hello pingpong +all: runsd hello pingpong tsum tsum1 runsd: (if test ! -d runs ; then mkdir runs; fi) @@ -28,11 +28,17 @@ hello: hello.o pingpong: pingpong.o $(FLINK) pingpong.o -o pingpong $(PSBLAS_LIB) $(LDLIBS) /bin/mv pingpong $(EXEDIR) +tsum: tsum.o + $(FLINK) tsum.o -o tsum $(PSBLAS_LIB) $(LDLIBS) + /bin/mv tsum $(EXEDIR) +tsum1: tsum1.o + $(FLINK) tsum1.o -o tsum1 $(PSBLAS_LIB) $(LDLIBS) + /bin/mv tsum1 $(EXEDIR) clean: - /bin/rm -f hello.o pingpong.o + /bin/rm -f hello.o pingpong.o tsum.o tsum1.o $(EXEDIR)/hello verycleanlib: (cd ../..; make veryclean) diff --git a/test/kernel/pdgenspmv.f90 b/test/kernel/pdgenspmv.f90 index d5fd9ba4..b7204edd 100644 --- a/test/kernel/pdgenspmv.f90 +++ b/test/kernel/pdgenspmv.f90 @@ -142,7 +142,7 @@ contains ! the rhs. ! subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& - & f,amold,vmold,imold,partition,nrl,iv) + & f,amold,vmold,imold,partition,nrl,iv,tnd) use psb_base_mod use psb_util_mod ! @@ -173,7 +173,7 @@ contains class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold integer(psb_ipk_), optional :: partition, nrl,iv(:) - + logical, optional :: tnd ! Local variables. integer(psb_ipk_), parameter :: nb=20 @@ -202,6 +202,7 @@ contains real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb integer(psb_ipk_) :: err_act procedure(d_func_3d), pointer :: f_ + logical :: tnd_ character(len=20) :: name, ch_err,tmpfmt info = psb_success_ @@ -495,9 +496,9 @@ contains t1 = psb_wtime() if (info == psb_success_) then if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold) + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) else - call psb_spasb(a,desc_a,info,afmt=afmt) + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) end if end if call psb_barrier(ctxt) @@ -549,13 +550,14 @@ program pdgenspmv use psb_base_mod use psb_util_mod use psb_d_pde3d_mod + implicit none ! input parameters character(len=20) :: kmethd, ptype character(len=5) :: afmt integer(psb_ipk_) :: idim - + logical :: tnd ! miscellaneous real(psb_dpk_), parameter :: one = done real(psb_dpk_) :: t1, t2, tprec, flops, tflops, tt1, tt2, bdwdth @@ -606,14 +608,14 @@ program pdgenspmv ! ! get parameters ! - call get_parms(ctxt,afmt,idim) - + call get_parms(ctxt,afmt,idim,tnd) + call psb_init_timers() ! ! allocate and fill in the coefficient matrix, rhs and initial guess ! call psb_barrier(ctxt) t1 = psb_wtime() - call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info) + call psb_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,tnd=tnd) call psb_barrier(ctxt) t2 = psb_wtime() - t1 if(info /= psb_success_) then @@ -694,7 +696,7 @@ program pdgenspmv write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize end if - + call psb_print_timers(ctxt) ! ! cleanup storage and exit @@ -721,10 +723,11 @@ contains ! ! get iteration parameters from standard input ! - subroutine get_parms(ctxt,afmt,idim) + subroutine get_parms(ctxt,afmt,idim,tnd) type(psb_ctxt_type) :: ctxt character(len=*) :: afmt integer(psb_ipk_) :: idim + logical :: tnd integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: intbuf(10), ip @@ -733,9 +736,11 @@ contains if (iam == 0) then read(psb_inp_unit,*) afmt read(psb_inp_unit,*) idim + read(psb_inp_unit,*) tnd endif call psb_bcast(ctxt,afmt) call psb_bcast(ctxt,idim) + call psb_bcast(ctxt,tnd) if (iam == 0) then write(psb_out_unit,'("Testing matrix : ell1")') @@ -743,6 +748,8 @@ contains write(psb_out_unit,'("Number of processors : ",i0)')np write(psb_out_unit,'("Data distribution : BLOCK")') write(psb_out_unit,'(" ")') + write(psb_out_unit,'("Storage format ",a)') afmt + write(psb_out_unit,'("Testing overlap ND ",l8)') tnd end if return diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp index 40e3358d..470bcf58 100644 --- a/test/pargen/runs/ppde.inp +++ b/test/pargen/runs/ppde.inp @@ -2,11 +2,11 @@ BICGSTAB Iterative method BICGSTAB CGS BICG BICGSTABL RGMRES FCG CGR BJAC Preconditioner NONE DIAG BJAC CSR Storage format for matrix A: CSR COO -140 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) +200 Domain size (acutal system is this**3 (pde3d) or **2 (pde2d) ) 3 Partition: 1 BLOCK 3 3D 2 Stopping criterion 1 2 0200 MAXIT -05 ITRACE +10 ITRACE 002 IRST restart for RGMRES and BiCGSTABL INVK Block Solver ILU,ILUT,INVK,AINVT,AORTH NONE If ILU : MILU or NONE othewise ignored