Merge branch 'development' of github.com:sfilippone/psblas3 into development

development
Salvatore Filippone 3 months ago
commit db558cace3

@ -67,6 +67,26 @@ UTILMODNAME=@UTILMODNAME@
CBINDLIBNAME=libpsb_cbind.a CBINDLIBNAME=libpsb_cbind.a
CUDAD=@CUDAD@
CUDALD=@CUDALD@
LCUDA=@LCUDA@
SPGPU_LIBS=@SPGPU_LIBS@
CUDA_DIR=@CUDA_DIR@
CUDA_DEFINES=@CUDA_DEFINES@
CUDA_INCLUDES=@CUDA_INCLUDES@
CUDA_LIBS=@CUDA_LIBS@
CUDA_VERSION=@CUDA_VERSION@
CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@
.SUFFIXES: .cu
.cu.o:
$(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<
@PSBLASRULES@ @PSBLASRULES@
PSBGPULDLIBS=$(LCUDA) $(SPGPU_LIBS) $(CUDA_LIBS) $(PSBLDLIBS) $(LIBS)

@ -1,6 +1,6 @@
include Make.inc include Make.inc
all: dirs based precd kryld utild cbindd libd all: dirs based precd kryld utild cbindd extd $(CUDAD) libd
@echo "=====================================" @echo "====================================="
@echo "PSBLAS libraries Compilation Successful." @echo "PSBLAS libraries Compilation Successful."
@ -12,15 +12,20 @@ dirs:
precd: based precd: based
utild: based utild: based
kryld: precd kryld: precd
extd: based
cudad: extd
cbindd: based precd kryld utild cbindd: based precd kryld utild
libd: based precd kryld utild cbindd libd: based precd kryld utild cbindd extd $(CUDALD)
$(MAKE) -C base lib $(MAKE) -C base lib
$(MAKE) -C prec lib $(MAKE) -C prec lib
$(MAKE) -C krylov lib $(MAKE) -C krylov lib
$(MAKE) -C util lib $(MAKE) -C util lib
$(MAKE) -C cbind lib $(MAKE) -C cbind lib
$(MAKE) -C ext lib
cudald: cudad
$(MAKE) -C cuda lib
based: based:
$(MAKE) -C base objs $(MAKE) -C base objs
@ -32,6 +37,10 @@ utild:
$(MAKE) -C util objs $(MAKE) -C util objs
cbindd: cbindd:
$(MAKE) -C cbind objs $(MAKE) -C cbind objs
extd: based
$(MAKE) -C ext objs
cudad: based extd
$(MAKE) -C cuda objs
install: all install: all
@ -56,6 +65,8 @@ clean:
$(MAKE) -C krylov clean $(MAKE) -C krylov clean
$(MAKE) -C util clean $(MAKE) -C util clean
$(MAKE) -C cbind clean $(MAKE) -C cbind clean
$(MAKE) -C ext clean
$(MAKE) -C cuda clean
check: all check: all
make check -C test/serial make check -C test/serial
@ -71,6 +82,8 @@ veryclean: cleanlib
cd krylov && $(MAKE) veryclean cd krylov && $(MAKE) veryclean
cd util && $(MAKE) veryclean cd util && $(MAKE) veryclean
cd cbind && $(MAKE) veryclean cd cbind && $(MAKE) veryclean
cd ext && $(MAKE) veryclean
cd cuda && $(MAKE) veryclean
cd test/fileread && $(MAKE) clean cd test/fileread && $(MAKE) clean
cd test/pargen && $(MAKE) clean cd test/pargen && $(MAKE) clean
cd test/util && $(MAKE) clean cd test/util && $(MAKE) clean

@ -1,4 +1,4 @@
PSBLAS library, version 3.8 PSBLAS library, version 3.9
=========================== ===========================
The architecture of the Fortran 2003 sparse BLAS is described in: The architecture of the Fortran 2003 sparse BLAS is described in:
@ -25,7 +25,7 @@ Harwell-Boeing and MatrixMarket file formats.
DOCUMENTATION DOCUMENTATION
------------- -------------
See docs/psblas-3.8.pdf; an HTML version of the same document is See docs/psblas-3.9.pdf; an HTML version of the same document is
available in docs/html. Please consult the sample programs, especially available in docs/html. Please consult the sample programs, especially
test/pargen/psb_[sd]_pde[23]d.f90 test/pargen/psb_[sd]_pde[23]d.f90
@ -40,6 +40,15 @@ The main reference for the serial sparse BLAS is:
>linear algebra subprograms for sparse matrices: a user level interface, >linear algebra subprograms for sparse matrices: a user level interface,
>ACM Trans. Math. Softw., 23(3), 379-401, 1997. >ACM Trans. Math. Softw., 23(3), 379-401, 1997.
CUDA and GPU support
--------------------
This version of PSBLAS incorporates into a single package three
entities that were previouslty separated:
1. PSBLAS -- the base library
2. PSBLAS-EXT -- a library providing additional storage formats
3. SPGPU -- a package of kernels for NVIDIA GPUs originally
written by Davide Barbieri and Salvatore Filippone;
see the license file cuda/License-spgpu.md
INSTALLING INSTALLING
---------- ----------
@ -61,6 +70,11 @@ prerequisites (see also SERIAL below):
specify `--with-amd` (see `./configure --help` for more details). specify `--with-amd` (see `./configure --help` for more details).
We use the C interface to AMD. We use the C interface to AMD.
5. If you have CUDA available, use
--with-cuda=<path> to specify the CUDA toolkit location
--with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute
capabilities) to compile the CUDA code for.
The configure script will generate a Make.inc file suitable for building The configure script will generate a Make.inc file suitable for building
the library. The script is capable of recognizing the needed libraries the library. The script is capable of recognizing the needed libraries
with their default names; if they are in unusual places consider adding with their default names; if they are in unusual places consider adding

@ -191,7 +191,9 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false. logical, parameter :: usersend=.false.
complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name character(len=20) :: name
info=psb_success_ info=psb_success_

@ -87,7 +87,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
integer(psb_lpk_) :: mglob, ih integer(psb_lpk_) :: mglob, ih
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me integer(psb_ipk_) :: np,me
logical, parameter :: gettime=.true., debug=.false. logical, parameter :: debug=.false.
integer(psb_mpk_) :: xchg_alg integer(psb_mpk_) :: xchg_alg
logical, parameter :: do_timings=.false. logical, parameter :: do_timings=.false.
integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1
@ -132,10 +132,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
goto 9999 goto 9999
end if end if
if (gettime) then
t0 = psb_wtime()
end if
nadj = size(adj) nadj = size(adj)
nidx = size(idx) nidx = size(idx)
call psb_realloc(nidx,iprc,info) call psb_realloc(nidx,iprc,info)

@ -137,7 +137,7 @@ subroutine psi_i_desc_index(desc,index_in,dep_list,&
& idxr, idxs, iszs, iszr, nesd, nerv, ixp, idx & idxr, idxs, iszs, iszr, nesd, nerv, ixp, idx
integer(psb_mpk_) :: icomm, minfo integer(psb_mpk_) :: icomm, minfo
logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false. logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false.
integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1, idx_phase4=-1 integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1, idx_phase4=-1
logical, parameter :: usempi=.false. logical, parameter :: usempi=.false.
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit

@ -99,6 +99,33 @@ module psi_c_serial_mod
end subroutine psi_caxpbyv2 end subroutine psi_caxpbyv2
end interface psb_geaxpby end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_cabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (inout) :: w(:)
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_cxyzw
end interface psi_xyzw
interface psi_gth interface psi_gth
subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y) subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_spk_ import :: psb_ipk_, psb_spk_

@ -99,6 +99,33 @@ module psi_d_serial_mod
end subroutine psi_daxpbyv2 end subroutine psi_daxpbyv2
end interface psb_geaxpby end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_dabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (inout) :: w(:)
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_dxyzw
end interface psi_xyzw
interface psi_gth interface psi_gth
subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y) subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_dpk_ import :: psb_ipk_, psb_dpk_

@ -99,6 +99,33 @@ module psi_e_serial_mod
end subroutine psi_eaxpbyv2 end subroutine psi_eaxpbyv2
end interface psb_geaxpby end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_eabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (inout) :: w(:)
integer(psb_epk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_exyzw
end interface psi_xyzw
interface psi_gth interface psi_gth
subroutine psi_egthmv(n,k,idx,alpha,x,beta,y) subroutine psi_egthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_

@ -99,6 +99,33 @@ module psi_i2_serial_mod
end subroutine psi_i2axpbyv2 end subroutine psi_i2axpbyv2
end interface psb_geaxpby end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_i2abgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (inout) :: w(:)
integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_i2xyzw
end interface psi_xyzw
interface psi_gth interface psi_gth
subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y) subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_

@ -99,6 +99,33 @@ module psi_m_serial_mod
end subroutine psi_maxpbyv2 end subroutine psi_maxpbyv2
end interface psb_geaxpby end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_mabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (inout) :: w(:)
integer(psb_mpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_mxyzw
end interface psi_xyzw
interface psi_gth interface psi_gth
subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y) subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_

@ -99,6 +99,33 @@ module psi_s_serial_mod
end subroutine psi_saxpbyv2 end subroutine psi_saxpbyv2
end interface psb_geaxpby end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_sabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (inout) :: w(:)
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_sxyzw
end interface psi_xyzw
interface psi_gth interface psi_gth
subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y) subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_spk_ import :: psb_ipk_, psb_spk_

@ -99,6 +99,33 @@ module psi_z_serial_mod
end subroutine psi_zaxpbyv2 end subroutine psi_zaxpbyv2
end interface psb_geaxpby end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_zabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (inout) :: w(:)
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_zxyzw
end interface psi_xyzw
interface psi_gth interface psi_gth
subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y) subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_dpk_ import :: psb_ipk_, psb_dpk_

@ -409,7 +409,7 @@ contains
! !
! Since the hashed lists take up (somewhat) more than 2*N_COL integers, ! Since the hashed lists take up (somewhat) more than 2*N_COL integers,
! it makes no sense to use them if you don't have at least ! it makes no sense to use them if you don't have at least
! 3 processes, no matter what the size of the process. ! 3 processes, no matter what the size of the index space.
! !
val = psb_cd_is_large_size(m) .and. (np > 2) val = psb_cd_is_large_size(m) .and. (np > 2)
end function psb_cd_choose_large_state end function psb_cd_choose_large_state

@ -215,7 +215,9 @@ contains
end if end if
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_,info) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if (mask(i)) then if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -229,9 +231,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_,info) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
idx(i) = idxmap%min_glob_row + idx(i) - 1 idx(i) = idxmap%min_glob_row + idx(i) - 1
@ -243,7 +247,7 @@ contains
info = -1 info = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine block_ll2gv1 end subroutine block_ll2gv1
@ -277,7 +281,9 @@ contains
end if end if
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,owned_,info,im) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
@ -291,9 +297,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,owned_,info,im) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
idxout(i) = idxmap%min_glob_row + idxin(i) - 1 idxout(i) = idxmap%min_glob_row + idxin(i) - 1
@ -305,7 +313,7 @@ contains
info = -1 info = -1
end if end if
end do end do
!$omp end parallel do
end if end if
if (is > im) then if (is > im) then
@ -392,6 +400,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,nv,tidx)
do i=1, is do i=1, is
if (mask(i)) then if (mask(i)) then
if ((idxmap%min_glob_row <= idx(i)).and. & if ((idxmap%min_glob_row <= idx(i)).and. &
@ -408,7 +419,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,ip,lip,tidx,info)
do i=1,is do i=1,is
if (mask(i)) then if (mask(i)) then
if ((idxmap%min_glob_row <= idx(i)).and.& if ((idxmap%min_glob_row <= idx(i)).and.&
@ -424,8 +439,8 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
!!$ write(0,*) 'Block status: invalid ',idxmap%get_state()
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
end if end if
@ -433,6 +448,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,nv,tidx)
do i=1, is do i=1, is
if ((idxmap%min_glob_row <= idx(i)).and.& if ((idxmap%min_glob_row <= idx(i)).and.&
& (idx(i) <= idxmap%max_glob_row)) then & (idx(i) <= idxmap%max_glob_row)) then
@ -447,8 +465,11 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,ip,lip,tidx,info)
do i=1,is do i=1,is
if ((idxmap%min_glob_row <= idx(i)).and.& if ((idxmap%min_glob_row <= idx(i)).and.&
& (idx(i) <= idxmap%max_glob_row)) then & (idx(i) <= idxmap%max_glob_row)) then
@ -462,6 +483,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -953,7 +975,9 @@ contains
end if end if
info = psb_success_ info = psb_success_
else else
info = -5 write(0,*) 'From has_search_ins:',info,ip,lip,nxt,&
& idxmap%min_glob_row,idxmap%max_glob_row
info = -6
return return
end if end if
idxout(i) = lip + idxmap%local_rows idxout(i) = lip + idxmap%local_rows

@ -221,6 +221,9 @@ contains
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if (mask(i)) then if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -233,9 +236,12 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
idx(i) = idxmap%loc_to_glob(idx(i)) idx(i) = idxmap%loc_to_glob(idx(i))
@ -246,7 +252,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine hash_l2gv1 end subroutine hash_l2gv1
@ -363,6 +369,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is do i = 1, is
if (mask(i)) then if (mask(i)) then
ip = idx(i) ip = idx(i)
@ -388,7 +397,7 @@ contains
endif endif
end if end if
enddo enddo
!$omp end parallel do
else else
write(0,*) 'Hash status: invalid ',idxmap%get_state() write(0,*) 'Hash status: invalid ',idxmap%get_state()
idx(1:is) = -1 idx(1:is) = -1
@ -404,6 +413,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is do i = 1, is
ip = idx(i) ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then if ((ip < 1 ).or.(ip>mglob)) then
@ -427,14 +439,12 @@ contains
idx(i) = lip idx(i) = lip
endif endif
enddo enddo
!$omp end parallel do
else else
write(0,*) 'Hash status: invalid ',idxmap%get_state() write(0,*) 'Hash status: invalid ',idxmap%get_state()
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
end if end if
end if end if
end subroutine hash_g2lv1 end subroutine hash_g2lv1
@ -493,6 +503,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is do i = 1, is
if (mask(i)) then if (mask(i)) then
ip = idxin(i) ip = idxin(i)
@ -518,6 +531,7 @@ contains
endif endif
end if end if
enddo enddo
!$omp end parallel do
else else
write(0,*) 'Hash status: invalid ',idxmap%get_state() write(0,*) 'Hash status: invalid ',idxmap%get_state()
idxout(1:is) = -1 idxout(1:is) = -1
@ -533,6 +547,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is do i = 1, is
ip = idxin(i) ip = idxin(i)
if ((ip < 1 ).or.(ip>mglob)) then if ((ip < 1 ).or.(ip>mglob)) then
@ -556,14 +573,12 @@ contains
idxout(i) = lip idxout(i) = lip
endif endif
enddo enddo
!$omp end parallel do
else else
write(0,*) 'Hash status: invalid ',idxmap%get_state() write(0,*) 'Hash status: invalid ',idxmap%get_state()
idxout(1:is) = -1 idxout(1:is) = -1
info = -1 info = -1
end if end if
end if end if
end subroutine hash_g2lv2 end subroutine hash_g2lv2
@ -649,7 +664,7 @@ contains
& err_act & err_act
integer(psb_lpk_) :: mglob, ip, nxt, tlip integer(psb_lpk_) :: mglob, ip, nxt, tlip
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: me, np integer(psb_ipk_) :: me, np,ith
character(len=20) :: name,ch_err character(len=20) :: name,ch_err
logical, allocatable :: mask_(:) logical, allocatable :: mask_(:)
!!$ logical :: use_openmp = .true. !!$ logical :: use_openmp = .true.
@ -683,370 +698,249 @@ contains
mglob = idxmap%get_gr() mglob = idxmap%get_gr()
nrow = idxmap%get_lr() nrow = idxmap%get_lr()
!write(0,*) me,name,' before loop ',psb_errstatus_fatal() !write(0,*) me,name,' before loop ',psb_errstatus_fatal()
#if 0 && defined(OPENMP) #if defined(OPENMP)
!call OMP_init_lock(ins_lck)
if (idxmap%is_bld()) then
isLoopValid = .true. isLoopValid = .true.
ncol = idxmap%get_lc() if (idxmap%is_bld()) then
if (present(mask)) then
mask_ = mask
else
allocate(mask_(size(idx)))
mask_ = .true.
end if
if (present(lidx)) then if (present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp critical(hash_g2l_ins) !$omp parallel do default(none) schedule(dynamic) &
!$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & !$omp private(i,ip,lip,tlip,nxt,info) &
! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz,lidx) & !$omp reduction(.AND.:isLoopValid)
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
do i = 1, is do i = 1, is
info = 0
if (.not. isLoopValid) cycle
if (mask(i)) then if (mask(i)) then
ip = idx(i) ip = idx(i)
if ((ip < 1 ).or.(ip>mglob) ) then if ((ip < 1 ).or.(ip>mglob) ) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then if (lip > 0) then
call OMP_set_lock(ins_lck) idx(i) = lip
info = psb_success_
! We check again if the index is already in 'idxmap', this else
! time inside a critical region (we assume that the index !$omp critical(hash_g2l_ins)
! is often already existing). tlip = lip
ncol = idxmap%get_lc()
nxt = lidx(i) nxt = lidx(i)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& if (nxt <= nrow) then
& idxmap%hashv,idxmap%glb_lc,ncol) idx(i) = -1
else
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then if (lip > 0) then
idx(i) = lip idx(i) = lip
else if (lip < 0) then else
! Index not found
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip lip = tlip
if (info /= 0) write(0,*) ' inskey 1 info:',info
if (info >=0) then if (info >=0) then
! 'nxt' is not equal to 'tlip' when the key is already inside if (nxt == lip) then
! the hash map. In that case 'tlip' is the value corresponding call psb_ensure_size(lip,idxmap%loc_to_glob,info,&
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz) & pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then if (info /= psb_success_) then
!write(0,*) 'Error spot 1' info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/)) & a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false. isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if end if
idxmap%loc_to_glob(nxt) = ip
nxt = max(ncol,nxt)
call idxmap%set_lc(nxt)
endif endif
if (isLoopValid) info = 0 idx(i) = lip
info = psb_success_
else else
idx(i) = -1 call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if end if
call OMP_unset_lock(ins_lck)
end if end if
else endif
idx(i) = lip !$omp end critical(hash_g2l_ins)
end if end if
else else
idx(i) = -1 idx(i) = -1
end if end if
enddo enddo
! $ OMP END PARALLEL DO !$omp end parallel do
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then else if (.not.present(mask)) then
goto 9999
end if
else
!$omp critical(hash_g2l_ins)
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & !$omp parallel do default(none) schedule(dynamic) &
! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz,lidx) & !$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP private(i,ip,lip,tlip,nxt,info) & !$omp private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid) !$omp reduction(.AND.:isLoopValid)
do i = 1, is do i = 1, is
info = 0
if (.not. isLoopValid) cycle
ip = idx(i) ip = idx(i)
if ((ip < 1 ).or.(ip>mglob) ) then if ((ip < 1 ).or.(ip>mglob) ) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then if (lip > 0) then
call OMP_set_lock(ins_lck) idx(i) = lip
! We check again if the index is already in 'idxmap', this info = psb_success_
! time inside a critical region (we assume that the index else
! is often already existing). !$omp critical(hash_g2l_ins)
ncol = idxmap%get_lc() tlip = lip
nxt = lidx(i) nxt = lidx(i)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& if (nxt <= nrow) then
& idxmap%hashv,idxmap%glb_lc,ncol) idx(i) = -1
else
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then if (lip > 0) then
idx(i) = lip idx(i) = lip
else if (lip < 0) then else
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip lip = tlip
!!$ if (info /= 0) write(0,*) ' inskey 2 info:',info
if (info >=0) then if (info >=0) then
! 'nxt' is not equal to 'tlip' when the key is already inside if (nxt == lip) then
! the hash map. In that case 'tlip' is the value corresponding call psb_ensure_size(lip,idxmap%loc_to_glob,info,&
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz) & pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then if (info /= psb_success_) then
!write(0,*) 'Error spot 2' info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/)) & a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false. isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if end if
idxmap%loc_to_glob(nxt) = ip
nxt = max(ncol,nxt)
call idxmap%set_lc(nxt)
endif endif
if (isLoopValid) info = 0 idx(i) = lip
info = psb_success_
else else
idx(i) = -1 call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if end if
call OMP_unset_lock(ins_lck)
end if end if
else
idx(i) = lip
endif endif
end do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins) !$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if end if
enddo
!$omp end parallel do
end if end if
else if (.not.present(lidx)) then else if (.not.present(lidx)) then
if(present(mask)) then
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp critical(hash_g2l_ins) if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is do i = 1, is
info = 0
if (.not. isLoopValid) cycle
if (mask(i)) then if (mask(i)) then
ip = idx(i) ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
!write(0,*) me,name,' b hic 1 ',psb_errstatus_fatal()
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
!write(0,*) me,name,' a hic 1 ',psb_errstatus_fatal() if (lip > 0) then
if (lip < 0) then idx(i) = lip
!call OMP_set_lock(ins_lck) info = psb_success_
! We check again if the index is already in 'idxmap', this else
! time inside a critical region (we assume that the index !$omp critical(hash_g2l_ins)
! is often already existing, so this lock is relatively rare).
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
nxt = ncol + 1 nxt = ncol + 1
!write(0,*) me,name,' b hic 2 ',psb_errstatus_fatal() call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& & idxmap%glb_lc,ncol)
& idxmap%hashv,idxmap%glb_lc,ncol)
!write(0,*) me,name,' a hic 2 ',psb_errstatus_fatal()
if (lip > 0) then if (lip > 0) then
idx(i) = lip idx(i) = lip
else if (lip < 0) then else
! Index not found
!write(0,*) me,name,' b hsik ',psb_errstatus_fatal()
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
!!$ if (info /= 0) write(0,*) ' inskey 3 info:',info
if (psb_errstatus_fatal()) write(0,*) me,name,' a hsik ',info,omp_get_thread_num()
!write(0,*) me,name,' a hsik ',psb_errstatus_fatal()
lip = tlip lip = tlip
if (info >=0) then if (info >=0) then
!write(0,*) 'Error before spot 3', info if (nxt == lip) then
! 'nxt' is not equal to 'tlip' when the key is already inside call psb_ensure_size(lip,idxmap%loc_to_glob,info,&
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz) & pad=-1_psb_lpk_,addsz=laddsz)
if (psb_errstatus_fatal()) write(0,*) me,name,' a esz ',info,omp_get_thread_num()
if (info /= psb_success_) then if (info /= psb_success_) then
write(0,*) 'Error spot 3', info info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/)) & a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false. isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(nxt)
endif endif
if (isLoopValid) info = 0 idx(i) = lip
info = psb_success_
else else
idx(i) = -1 call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if end if
!!$ if (info /= 0) write(0,*) ' inskey 3.5 info:',info, isLoopValid
!call OMP_unset_lock(ins_lck)
end if end if
else !$omp end critical(hash_g2l_ins)
idx(i) = lip
end if end if
else else
idx(i) = -1 idx(i) = -1
end if end if
enddo enddo
! $ OMP END PARALLEL DO !$omp end parallel do
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then else if (.not.present(mask)) then
goto 9999
end if !$omp parallel do default(none) schedule(dynamic) &
else !$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) & !$omp private(i,ip,lip,tlip,nxt,info) &
! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) & !$omp reduction(.AND.:isLoopValid)
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp critical(hash_g2l_ins)
do i = 1, is do i = 1, is
info = 0
if (.not. isLoopValid) cycle
ip = idx(i) ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then if (lip > 0) then
!call OMP_set_lock(ins_lck) idx(i) = lip
! We check again if the index is already in 'idxmap', this info = psb_success_
! time inside a critical region (we assume that the index else
! is often already existing). !$omp critical(hash_g2l_ins)
ncol = idxmap%get_lc() ncol = idxmap%get_lc()
nxt = ncol + 1 nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%glb_lc,ncol)
if (lip > 0) then if (lip > 0) then
idx(i) = lip idx(i) = lip
else if (lip < 0) then else
! Index not found
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip lip = tlip
!!$ if (info /= 0) write(0,*) ' inskey 4 info:',info
if (info >=0) then if (info >=0) then
! 'nxt' is not equal to 'tlip' when the key is already inside if (nxt == lip) then
! the hash map. In that case 'tlip' is the value corresponding call psb_ensure_size(lip,idxmap%loc_to_glob,info,&
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz) & pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then if (info /= psb_success_) then
!write(0,*) 'Error spot 4' info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,& call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/)) & a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false. isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(nxt)
endif endif
if (isLoopValid) info = 0 idx(i) = lip
info = psb_success_
else else
idx(i) = -1 call psb_errpush(psb_err_from_subroutine_ai_,name,&
end if & a_err='SearchInsKeyVal',i_err=(/info/))
!call OMP_unset_lock(ins_lck) isLoopValid = .false.
end if end if
else
idx(i) = lip
end if end if
end do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins) !$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if end if
enddo
!$omp end parallel do
end if end if
end if end if
else else
@ -1054,7 +948,7 @@ contains
idx = -1 idx = -1
info = -1 info = -1
end if end if
!call OMP_destroy_lock(ins_lck) if (.not. isLoopValid) goto 9999
#else #else
!!$ else if (.not.use_openmp) then !!$ else if (.not.use_openmp) then
isLoopValid = .true. isLoopValid = .true.
@ -1073,13 +967,13 @@ contains
call hash_inner_cnv(ip,lip,idxmap%hashvmask,& call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol) & idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then if (lip < 0) then
tlip = lip
nxt = lidx(i) nxt = lidx(i)
if (nxt <= nrow) then if (nxt <= nrow) then
idx(i) = -1 idx(i) = -1
cycle cycle
endif endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info) call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >=0) then if (info >=0) then
if (nxt == tlip) then if (nxt == tlip) then
ncol = max(ncol,nxt) ncol = max(ncol,nxt)
@ -1754,6 +1648,9 @@ contains
! for a width of psb_hash_bits ! for a width of psb_hash_bits
! !
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,glb_lc,nrm,mask) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n do i=1, n
if (mask(i)) then if (mask(i)) then
key = x(i) key = x(i)
@ -1791,7 +1688,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,glb_lc,nrm) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n do i=1, n
key = x(i) key = x(i)
ih = iand(key,hashmask) ih = iand(key,hashmask)
@ -1827,6 +1728,7 @@ contains
x(i) = tmp x(i) = tmp
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine hash_inner_cnv1 end subroutine hash_inner_cnv1
@ -1849,6 +1751,9 @@ contains
! for a width of psb_hash_bits ! for a width of psb_hash_bits
! !
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n do i=1, n
if (mask(i)) then if (mask(i)) then
key = x(i) key = x(i)
@ -1889,9 +1794,12 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n do i=1, n
key = x(i) key = x(i)
ih = iand(key,hashmask) ih = iand(key,hashmask)
@ -1930,6 +1838,7 @@ contains
y(i) = tmp y(i) = tmp
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine hash_inner_cnv2 end subroutine hash_inner_cnv2

@ -383,12 +383,12 @@ contains
integer(psb_lpk_), intent(out) :: val integer(psb_lpk_), intent(out) :: val
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: hsize,hmask, hk, hd integer(psb_ipk_) :: hsize,hmask, hk, hd, i
logical :: redo
info = HashOK info = HashOK
hsize = hash%hsize hsize = hash%hsize
hmask = hash%hmask hmask = hash%hmask
val = -1
hk = iand(psb_hashval(key),hmask) hk = iand(psb_hashval(key),hmask)
if (hk == 0) then if (hk == 0) then
hd = 1 hd = 1
@ -400,21 +400,21 @@ contains
info = HashOutOfMemory info = HashOutOfMemory
return return
end if end if
val = -1
!$omp atomic
hash%nsrch = hash%nsrch + 1 hash%nsrch = hash%nsrch + 1
!$omp end atomic
do do
!$omp atomic
hash%nacc = hash%nacc + 1 hash%nacc = hash%nacc + 1
!$omp end atomic
if (hash%table(hk,1) == key) then if (hash%table(hk,1) == key) then
val = hash%table(hk,2) val = hash%table(hk,2)
info = HashDuplicate info = HashDuplicate
!write(0,*) 'In searchinskey 1 : ', info, HashDuplicate
return return
end if end if
redo = .false.
!$omp critical(hashsearchins) !$omp critical(hashsearchins)
if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
if (hash%table(hk,1) == HashFreeEntry) then if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then if (hash%nk == hash%hsize -1) then
! !
@ -426,30 +426,31 @@ contains
call psb_hash_realloc(hash,info) call psb_hash_realloc(hash,info)
if (info /= HashOk) then if (info /= HashOk) then
info = HashOutOfMemory info = HashOutOfMemory
!return
else else
call psb_hash_searchinskey(key,val,nextval,hash,info) redo = .true.
!return
end if end if
else else
hash%nk = hash%nk + 1 hash%nk = hash%nk + 1
hash%table(hk,1) = key hash%table(hk,1) = key
hash%table(hk,2) = nextval hash%table(hk,2) = nextval
val = nextval val = nextval
!return info = HashOk
end if
end if end if
else if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
info = HashNotFound
end if end if
!$omp end critical(hashsearchins) !$omp end critical(hashsearchins)
if (info /= HashOk) then if (redo) then
write(0,*) 'In searchinskey 2: ', info call psb_hash_searchinskey(key,val,nextval,hash,info)
return return
end if end if
if (val > 0) return if (val > 0) exit
hk = hk - hd hk = hk - hd
if (hk < 0) hk = hk + hsize if (hk < 0) hk = hk + hsize
end do end do
!write(0,*) 'In searchinskey 3: ', info
end subroutine psb_hash_lsearchinskey end subroutine psb_hash_lsearchinskey
recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info) recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info)
@ -459,6 +460,7 @@ contains
integer(psb_ipk_) :: hsize,hmask, hk, hd integer(psb_ipk_) :: hsize,hmask, hk, hd
logical :: redo logical :: redo
info = HashOK info = HashOK
hsize = hash%hsize hsize = hash%hsize
hmask = hash%hmask hmask = hash%hmask
@ -475,16 +477,21 @@ contains
return return
end if end if
val = -1 val = -1
val = -1
!$omp atomic
hash%nsrch = hash%nsrch + 1 hash%nsrch = hash%nsrch + 1
!$omp end atomic
do do
!$omp atomic
hash%nacc = hash%nacc + 1 hash%nacc = hash%nacc + 1
!$omp end atomic
if (hash%table(hk,1) == key) then if (hash%table(hk,1) == key) then
val = hash%table(hk,2) val = hash%table(hk,2)
info = HashDuplicate info = HashDuplicate
return return
end if end if
redo = .false. redo = .false.
!$OMP CRITICAL !$omp critical(hashsearchins)
if (hash%table(hk,1) == HashFreeEntry) then if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then if (hash%nk == hash%hsize -1) then
! !
@ -496,24 +503,28 @@ contains
call psb_hash_realloc(hash,info) call psb_hash_realloc(hash,info)
if (info /= HashOk) then if (info /= HashOk) then
info = HashOutOfMemory info = HashOutOfMemory
!return
else else
redo = .true. redo = .true.
!!$ call psb_hash_searchinskey(key,val,nextval,hash,info)
!!$ return
end if end if
else else
hash%nk = hash%nk + 1 hash%nk = hash%nk + 1
hash%table(hk,1) = key hash%table(hk,1) = key
hash%table(hk,2) = nextval hash%table(hk,2) = nextval
val = nextval val = nextval
!return info = HashOk
end if end if
else if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
info = HashNotFound
end if
!$omp end critical(hashsearchins)
if (redo) then
call psb_hash_searchinskey(key,val,nextval,hash,info)
return
end if end if
!$OMP END CRITICAL if (val > 0) exit
if (redo) call psb_hash_searchinskey(key,val,nextval,hash,info)
if (info /= HashOk) return
if (val > 0) return
hk = hk - hd hk = hk - hd
if (hk < 0) hk = hk + hsize if (hk < 0) hk = hk + hsize
end do end do
@ -551,7 +562,7 @@ contains
end if end if
if (hash%table(hk,1) == HashFreeEntry) then if (hash%table(hk,1) == HashFreeEntry) then
val = HashFreeEntry val = HashFreeEntry
! !$ info = HashNotFound info = HashNotFound
return return
end if end if
hk = hk - hd hk = hk - hd
@ -591,7 +602,7 @@ contains
end if end if
if (hash%table(hk,1) == HashFreeEntry) then if (hash%table(hk,1) == HashFreeEntry) then
val = HashFreeEntry val = HashFreeEntry
! !$ info = HashNotFound info = HashNotFound
return return
end if end if
hk = hk - hd hk = hk - hd

@ -153,11 +153,25 @@ module psb_indx_map_mod
procedure, pass(idxmap) :: set_gci => base_set_gci procedure, pass(idxmap) :: set_gci => base_set_gci
procedure, pass(idxmap) :: set_grl => base_set_grl procedure, pass(idxmap) :: set_grl => base_set_grl
procedure, pass(idxmap) :: set_gcl => base_set_gcl procedure, pass(idxmap) :: set_gcl => base_set_gcl
#if defined(IPK4) && defined(LPK8)
generic, public :: set_gr => set_grl, set_gri
generic, public :: set_gc => set_gcl, set_gci
#else
generic, public :: set_gr => set_grl generic, public :: set_gr => set_grl
generic, public :: set_gc => set_gcl generic, public :: set_gc => set_gcl
#endif
procedure, pass(idxmap) :: set_lr => base_set_lr procedure, pass(idxmap) :: set_lri => base_set_lri
procedure, pass(idxmap) :: set_lc => base_set_lc procedure, pass(idxmap) :: set_lrl => base_set_lrl
procedure, pass(idxmap) :: set_lci => base_set_lci
procedure, pass(idxmap) :: set_lcl => base_set_lcl
procedure, pass(idxmap) :: inc_lc => base_inc_lc
#if defined(IPK4) && defined(LPK8)
generic, public :: set_lr => set_lrl, set_lri
generic, public :: set_lc => set_lcl, set_lci
#else
generic, public :: set_lr => set_lri
generic, public :: set_lc => set_lci
#endif
procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy
procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy
@ -235,7 +249,8 @@ module psb_indx_map_mod
& base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,& & base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,&
& base_get_mpic, base_sizeof, base_set_null, & & base_get_mpic, base_sizeof, base_set_null, &
& base_set_grl, base_set_gcl, & & base_set_grl, base_set_gcl, &
& base_set_lr, base_set_lc, base_set_ctxt,& & base_set_lri, base_set_lci, base_set_lrl, base_set_lcl, &
& base_inc_lc, base_set_ctxt,&
& base_set_mpic, base_get_fmt, base_asb, base_free,& & base_set_mpic, base_get_fmt, base_asb, base_free,&
& base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,& & base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,&
& base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,& & base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,&
@ -557,21 +572,47 @@ contains
idxmap%global_cols = val idxmap%global_cols = val
end subroutine base_set_gcl end subroutine base_set_gcl
subroutine base_set_lr(idxmap,val) subroutine base_set_lri(idxmap,val)
implicit none implicit none
class(psb_indx_map), intent(inout) :: idxmap class(psb_indx_map), intent(inout) :: idxmap
integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), intent(in) :: val
idxmap%local_rows = val idxmap%local_rows = val
end subroutine base_set_lr end subroutine base_set_lri
subroutine base_set_lc(idxmap,val) subroutine base_set_lci(idxmap,val)
implicit none implicit none
class(psb_indx_map), intent(inout) :: idxmap class(psb_indx_map), intent(inout) :: idxmap
integer(psb_ipk_), intent(in) :: val integer(psb_ipk_), intent(in) :: val
!$omp critical
idxmap%local_cols = val
!$omp end critical
end subroutine base_set_lci
subroutine base_set_lrl(idxmap,val)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
integer(psb_lpk_), intent(in) :: val
idxmap%local_rows = val
end subroutine base_set_lrl
subroutine base_set_lcl(idxmap,val)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
integer(psb_lpk_), intent(in) :: val
!$omp critical
idxmap%local_cols = val idxmap%local_cols = val
end subroutine base_set_lc !$omp end critical
end subroutine base_set_lcl
subroutine base_inc_lc(idxmap)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
!$omp atomic
idxmap%local_cols = idxmap%local_cols + 1
!$omp end atomic
end subroutine base_inc_lc
subroutine base_set_p_adjcncy(idxmap,val) subroutine base_set_p_adjcncy(idxmap,val)
use psb_realloc_mod use psb_realloc_mod

@ -179,6 +179,9 @@ contains
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if (mask(i)) then if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then
@ -191,9 +194,12 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then
idx(i) = idxmap%loc_to_glob(idx(i)) idx(i) = idxmap%loc_to_glob(idx(i))
@ -204,6 +210,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
@ -298,6 +305,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_valid()) then if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,ix)
do i=1,is do i=1,is
if (mask(i)) then if (mask(i)) then
if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -309,6 +319,7 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -317,6 +328,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_valid()) then if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,ix)
do i=1, is do i=1, is
if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idx(i)) ix = idxmap%glob_to_loc(idx(i))
@ -326,6 +340,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -365,6 +380,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_valid()) then if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idxin,idxout,idxmap,owned_) &
!$omp private(i,ix)
do i=1,is do i=1,is
if (mask(i)) then if (mask(i)) then
if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -376,6 +394,7 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
idxout(1:is) = -1 idxout(1:is) = -1
info = -1 info = -1
@ -384,6 +403,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_valid()) then if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idxin,idxout,idxmap,owned_) &
!$omp private(i,ix)
do i=1, is do i=1, is
if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idxin(i)) ix = idxmap%glob_to_loc(idxin(i))
@ -393,6 +415,7 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idxout(1:is) = -1 idxout(1:is) = -1
info = -1 info = -1
@ -541,6 +564,10 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,laddsz,lidx) &
!$omp private(i,ix,info)
! $ o m p reduction(.AND.:isLoopValid)
do i=1, is do i=1, is
if (info /= 0) cycle if (info /= 0) cycle
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -579,8 +606,8 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
else if (.not.present(lidx)) then else if (.not.present(lidx)) then
if (present(mask)) then if (present(mask)) then

@ -169,7 +169,9 @@ contains
end if end if
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if (mask(i)) then if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -179,9 +181,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap) &
!$omp private(i)
do i=1, size(idx) do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
! do nothing ! do nothing
@ -189,7 +193,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end subroutine repl_l2gv1 end subroutine repl_l2gv1
@ -223,7 +227,9 @@ contains
end if end if
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
@ -233,9 +239,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -243,7 +251,7 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
if (is > im) info = -3 if (is > im) info = -3
@ -324,6 +332,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,is) &
!$omp private(i)
do i=1, is do i=1, is
if (mask(i)) then if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -333,7 +344,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,is) &
!$omp private(i)
do i=1,is do i=1,is
if (mask(i)) then if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -344,6 +359,7 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -352,6 +368,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,is) &
!$omp private(i)
do i=1, is do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing ! do nothing
@ -359,7 +378,11 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,is) &
!$omp private(i)
do i=1,is do i=1,is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing ! do nothing
@ -367,6 +390,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idx(1:is) = -1 idx(1:is) = -1
info = -1 info = -1
@ -409,6 +433,9 @@ contains
if (present(mask)) then if (present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -418,7 +445,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1,im do i=1,im
if (mask(i)) then if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -428,6 +459,7 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else else
idxout(1:im) = -1 idxout(1:im) = -1
info = -1 info = -1
@ -436,6 +468,9 @@ contains
else if (.not.present(mask)) then else if (.not.present(mask)) then
if (idxmap%is_asb()) then if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -443,7 +478,11 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1,im do i=1,im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -451,6 +490,7 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
else else
idxout(1:im) = -1 idxout(1:im) = -1
info = -1 info = -1
@ -557,6 +597,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
if (present(lidx)) then if (present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,lidx,is,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if (mask(i)) then if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -566,9 +609,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,lidx,is,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing ! do nothing
@ -576,9 +621,13 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
else if (.not.present(lidx)) then else if (.not.present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,is,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if (mask(i)) then if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -588,8 +637,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,is,idxmap) &
!$omp private(i)
do i=1, is do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing ! do nothing
@ -597,6 +649,7 @@ contains
idx(i) = -1 idx(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end if end if
else else
@ -644,6 +697,9 @@ contains
else if (idxmap%is_valid()) then else if (idxmap%is_valid()) then
if (present(lidx)) then if (present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -653,9 +709,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -663,9 +721,13 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
else if (.not.present(lidx)) then else if (.not.present(lidx)) then
if (present(mask)) then if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if (mask(i)) then if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -675,8 +737,11 @@ contains
end if end if
end if end if
end do end do
!$omp end parallel do
else if (.not.present(mask)) then else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i) idxout(i) = idxin(i)
@ -684,6 +749,7 @@ contains
idxout(i) = -1 idxout(i) = -1
end if end if
end do end do
!$omp end parallel do
end if end if
end if end if
else else

@ -136,9 +136,9 @@ module psb_const_mod
! !
! Version ! Version
! !
character(len=*), parameter :: psb_version_string_ = "3.8.0" character(len=*), parameter :: psb_version_string_ = "3.9.0"
integer(psb_ipk_), parameter :: psb_version_major_ = 3 integer(psb_ipk_), parameter :: psb_version_major_ = 3
integer(psb_ipk_), parameter :: psb_version_minor_ = 8 integer(psb_ipk_), parameter :: psb_version_minor_ = 9
integer(psb_ipk_), parameter :: psb_patchlevel_ = 0 integer(psb_ipk_), parameter :: psb_patchlevel_ = 0
! !

@ -143,6 +143,20 @@ module psb_c_psblas_mod
end subroutine psb_caxpby end subroutine psb_caxpby
end interface end interface
interface psb_abgdxyz
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_c_vect_type, psb_cspmat_type
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
type(psb_c_vect_type), intent (inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_cabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax interface psb_geamax
function psb_camax(x, desc_a, info, jx,global) function psb_camax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_spk_, psb_ipk_, & import :: psb_desc_type, psb_spk_, psb_ipk_, &

@ -143,6 +143,20 @@ module psb_d_psblas_mod
end subroutine psb_daxpby end subroutine psb_daxpby
end interface end interface
interface psb_abgdxyz
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_d_vect_type, psb_dspmat_type
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
type(psb_d_vect_type), intent (inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_dabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax interface psb_geamax
function psb_damax(x, desc_a, info, jx,global) function psb_damax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_dpk_, psb_ipk_, & import :: psb_desc_type, psb_dpk_, psb_ipk_, &

@ -143,6 +143,20 @@ module psb_s_psblas_mod
end subroutine psb_saxpby end subroutine psb_saxpby
end interface end interface
interface psb_abgdxyz
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_s_vect_type, psb_sspmat_type
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
type(psb_s_vect_type), intent (inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_sabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax interface psb_geamax
function psb_samax(x, desc_a, info, jx,global) function psb_samax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_spk_, psb_ipk_, & import :: psb_desc_type, psb_spk_, psb_ipk_, &

@ -143,6 +143,20 @@ module psb_z_psblas_mod
end subroutine psb_zaxpby end subroutine psb_zaxpby
end interface end interface
interface psb_abgdxyz
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_z_vect_type, psb_zspmat_type
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
type(psb_z_vect_type), intent (inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_zabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax interface psb_geamax
function psb_zamax(x, desc_a, info, jx,global) function psb_zamax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_dpk_, psb_ipk_, & import :: psb_desc_type, psb_dpk_, psb_ipk_, &

@ -155,6 +155,9 @@ module psb_c_base_vect_mod
procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_v2 => c_base_axpby_v2
procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => c_base_abgdxyz
procedure, pass(w) :: xyzw => c_base_xyzw
! !
! Vector by vector multiplication. Need all variants ! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners ! to handle multiple requirements from preconditioners
@ -1018,7 +1021,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x The class(base_vect) to be added !! \param x The class(base_vect) to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param info return code !! \param info return code
!! !!
subroutine c_base_axpby_v(m,alpha, x, beta, y, info) subroutine c_base_axpby_v(m,alpha, x, beta, y, info)
@ -1047,7 +1050,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x The class(base_vect) to be added !! \param x The class(base_vect) to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param y The class(base_vect) to be added !! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned !! \param z The class(base_vect) to be returned
!! \param info return code !! \param info return code
@ -1078,7 +1081,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x(:) The array to be added !! \param x(:) The array to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param info return code !! \param info return code
!! !!
subroutine c_base_axpby_a(m,alpha, x, beta, y, info) subroutine c_base_axpby_a(m,alpha, x, beta, y, info)
@ -1126,6 +1129,64 @@ contains
end subroutine c_base_axpby_a2 end subroutine c_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_c_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_base_vect_type), intent(inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=czero)) call x%sync()
if (y%is_dev().and.(beta/=czero)) call y%sync()
if (z%is_dev().and.(delta/=czero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine c_base_abgdxyz
subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_base_vect_type), intent(inout) :: z
class(psb_c_base_vect_type), intent(inout) :: w
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=czero)) call x%sync()
if (y%is_dev().and.(b/=czero)) call y%sync()
if (z%is_dev().and.(d/=czero)) call z%sync()
if (w%is_dev().and.(f/=czero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine c_base_xyzw
! !
! Multiple variants of two operations: ! Multiple variants of two operations:

@ -579,6 +579,110 @@ module psb_c_csr_mat_mod
end subroutine psb_c_csr_scals end subroutine psb_c_csr_scals
end interface end interface
type, extends(psb_c_csr_sparse_mat) :: psb_c_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => c_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_c_ecsr_csmm
procedure, pass(a) :: csmv => psb_c_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_c_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_c_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_c_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_c_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_c_ecsr_cmp_nerwp
procedure, pass(a) :: free => c_ecsr_free
procedure, pass(a) :: mold => psb_c_ecsr_mold
end type psb_c_ecsr_sparse_mat
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_csmv
interface
subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_c_ecsr_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_c_ecsr_csmv
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo
interface
subroutine psb_c_ecsr_cmp_nerwp(a,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_ecsr_cmp_nerwp
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo
interface
subroutine psb_c_cp_ecsr_from_coo(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_cp_ecsr_from_coo
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_cp_from_fmt
interface
subroutine psb_c_cp_ecsr_from_fmt(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_cp_ecsr_from_fmt
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_mv_from_coo
interface
subroutine psb_c_mv_ecsr_from_coo(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_mv_ecsr_from_coo
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_mv_from_fmt
interface
subroutine psb_c_mv_ecsr_from_fmt(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_mv_ecsr_from_fmt
end interface
!> \memberof psb_c_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_c_ecsr_mold(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_lc_csr_sparse_mat !> \namespace psb_base_mod \class psb_lc_csr_sparse_mat
!! \extends psb_lc_base_mat_mod::psb_lc_base_sparse_mat !! \extends psb_lc_base_mat_mod::psb_lc_base_sparse_mat
!! !!
@ -1178,6 +1282,26 @@ contains
function c_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function c_ecsr_get_fmt
subroutine c_ecsr_free(a)
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_c_csr_sparse_mat%free()
return
end subroutine c_ecsr_free
! == =================================== ! == ===================================
! !
! !

@ -79,12 +79,14 @@
module psb_c_mat_mod module psb_c_mat_mod
use psb_c_base_mat_mod use psb_c_base_mat_mod
use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat,&
& psb_c_ecsr_sparse_mat
use psb_c_csc_mat_mod, only : psb_c_csc_sparse_mat, psb_lc_csc_sparse_mat use psb_c_csc_mat_mod, only : psb_c_csc_sparse_mat, psb_lc_csc_sparse_mat
type :: psb_cspmat_type type :: psb_cspmat_type
class(psb_c_base_sparse_mat), allocatable :: a class(psb_c_base_sparse_mat), allocatable :: a
class(psb_c_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_lc_coo_sparse_mat), allocatable :: rmta type(psb_lc_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,8 @@ module psb_c_mat_mod
procedure, pass(a) :: cscnv_ip => psb_c_cscnv_ip procedure, pass(a) :: cscnv_ip => psb_c_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_c_cscnv_base procedure, pass(a) :: cscnv_base => psb_c_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_c_split_nd
procedure, pass(a) :: merge_nd => psb_c_merge_nd
procedure, pass(a) :: clone => psb_cspmat_clone procedure, pass(a) :: clone => psb_cspmat_clone
procedure, pass(a) :: move_alloc => psb_cspmat_type_move procedure, pass(a) :: move_alloc => psb_cspmat_type_move
! !
@ -840,6 +844,24 @@ module psb_c_mat_mod
! !
! !
interface
subroutine psb_c_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_split_nd
end interface
interface
subroutine psb_c_merge_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_merge_nd
end interface
! !
! CSCNV: switches to a different internal derived type. ! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target ! 3 versions: copying to target
@ -859,7 +881,6 @@ module psb_c_mat_mod
end subroutine psb_c_cscnv end subroutine psb_c_cscnv
end interface end interface
interface interface
subroutine psb_c_cscnv_ip(a,iinfo,type,mold,dupl) subroutine psb_c_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat
@ -871,7 +892,6 @@ module psb_c_mat_mod
end subroutine psb_c_cscnv_ip end subroutine psb_c_cscnv_ip
end interface end interface
interface interface
subroutine psb_c_cscnv_base(a,b,info,dupl) subroutine psb_c_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat

@ -102,6 +102,9 @@ module psb_c_vect_mod
procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => c_vect_abgdxyz
procedure, pass(z) :: xyzw => c_vect_xyzw
procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_v => c_vect_mlt_v
procedure, pass(y) :: mlt_a => c_vect_mlt_a procedure, pass(y) :: mlt_a => c_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2 procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2
@ -771,6 +774,38 @@ contains
end subroutine c_vect_axpby_a2 end subroutine c_vect_axpby_a2
subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_vect_type), intent(inout) :: x
class(psb_c_vect_type), intent(inout) :: y
class(psb_c_vect_type), intent(inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine c_vect_abgdxyz
subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_vect_type), intent(inout) :: x
class(psb_c_vect_type), intent(inout) :: y
class(psb_c_vect_type), intent(inout) :: z
class(psb_c_vect_type), intent(inout) :: w
complex(psb_spk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine c_vect_xyzw
subroutine c_vect_mlt_v(x, y, info) subroutine c_vect_mlt_v(x, y, info)
use psi_serial_mod use psi_serial_mod
implicit none implicit none

@ -155,6 +155,9 @@ module psb_d_base_vect_mod
procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_v2 => d_base_axpby_v2
procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => d_base_abgdxyz
procedure, pass(w) :: xyzw => d_base_xyzw
! !
! Vector by vector multiplication. Need all variants ! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners ! to handle multiple requirements from preconditioners
@ -1025,7 +1028,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x The class(base_vect) to be added !! \param x The class(base_vect) to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param info return code !! \param info return code
!! !!
subroutine d_base_axpby_v(m,alpha, x, beta, y, info) subroutine d_base_axpby_v(m,alpha, x, beta, y, info)
@ -1054,7 +1057,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x The class(base_vect) to be added !! \param x The class(base_vect) to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param y The class(base_vect) to be added !! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned !! \param z The class(base_vect) to be returned
!! \param info return code !! \param info return code
@ -1085,7 +1088,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x(:) The array to be added !! \param x(:) The array to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param info return code !! \param info return code
!! !!
subroutine d_base_axpby_a(m,alpha, x, beta, y, info) subroutine d_base_axpby_a(m,alpha, x, beta, y, info)
@ -1133,6 +1136,64 @@ contains
end subroutine d_base_axpby_a2 end subroutine d_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_d_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_base_vect_type), intent(inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=dzero)) call x%sync()
if (y%is_dev().and.(beta/=dzero)) call y%sync()
if (z%is_dev().and.(delta/=dzero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine d_base_abgdxyz
subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_base_vect_type), intent(inout) :: z
class(psb_d_base_vect_type), intent(inout) :: w
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=dzero)) call x%sync()
if (y%is_dev().and.(b/=dzero)) call y%sync()
if (z%is_dev().and.(d/=dzero)) call z%sync()
if (w%is_dev().and.(f/=dzero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine d_base_xyzw
! !
! Multiple variants of two operations: ! Multiple variants of two operations:

@ -579,6 +579,110 @@ module psb_d_csr_mat_mod
end subroutine psb_d_csr_scals end subroutine psb_d_csr_scals
end interface end interface
type, extends(psb_d_csr_sparse_mat) :: psb_d_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => d_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_d_ecsr_csmm
procedure, pass(a) :: csmv => psb_d_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_d_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_d_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_d_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_d_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_d_ecsr_cmp_nerwp
procedure, pass(a) :: free => d_ecsr_free
procedure, pass(a) :: mold => psb_d_ecsr_mold
end type psb_d_ecsr_sparse_mat
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_csmv
interface
subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_d_ecsr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_ecsr_csmv
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo
interface
subroutine psb_d_ecsr_cmp_nerwp(a,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_ecsr_cmp_nerwp
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo
interface
subroutine psb_d_cp_ecsr_from_coo(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_ecsr_from_coo
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_cp_from_fmt
interface
subroutine psb_d_cp_ecsr_from_fmt(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_ecsr_from_fmt
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_mv_from_coo
interface
subroutine psb_d_mv_ecsr_from_coo(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_ecsr_from_coo
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_mv_from_fmt
interface
subroutine psb_d_mv_ecsr_from_fmt(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_ecsr_from_fmt
end interface
!> \memberof psb_d_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_d_ecsr_mold(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_ld_csr_sparse_mat !> \namespace psb_base_mod \class psb_ld_csr_sparse_mat
!! \extends psb_ld_base_mat_mod::psb_ld_base_sparse_mat !! \extends psb_ld_base_mat_mod::psb_ld_base_sparse_mat
!! !!
@ -1178,6 +1282,26 @@ contains
function d_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function d_ecsr_get_fmt
subroutine d_ecsr_free(a)
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_d_csr_sparse_mat%free()
return
end subroutine d_ecsr_free
! == =================================== ! == ===================================
! !
! !

@ -79,12 +79,14 @@
module psb_d_mat_mod module psb_d_mat_mod
use psb_d_base_mat_mod use psb_d_base_mat_mod
use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat,&
& psb_d_ecsr_sparse_mat
use psb_d_csc_mat_mod, only : psb_d_csc_sparse_mat, psb_ld_csc_sparse_mat use psb_d_csc_mat_mod, only : psb_d_csc_sparse_mat, psb_ld_csc_sparse_mat
type :: psb_dspmat_type type :: psb_dspmat_type
class(psb_d_base_sparse_mat), allocatable :: a class(psb_d_base_sparse_mat), allocatable :: a
class(psb_d_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_ld_coo_sparse_mat), allocatable :: rmta type(psb_ld_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,8 @@ module psb_d_mat_mod
procedure, pass(a) :: cscnv_ip => psb_d_cscnv_ip procedure, pass(a) :: cscnv_ip => psb_d_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_d_cscnv_base procedure, pass(a) :: cscnv_base => psb_d_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_d_split_nd
procedure, pass(a) :: merge_nd => psb_d_merge_nd
procedure, pass(a) :: clone => psb_dspmat_clone procedure, pass(a) :: clone => psb_dspmat_clone
procedure, pass(a) :: move_alloc => psb_dspmat_type_move procedure, pass(a) :: move_alloc => psb_dspmat_type_move
! !
@ -840,6 +844,24 @@ module psb_d_mat_mod
! !
! !
interface
subroutine psb_d_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_split_nd
end interface
interface
subroutine psb_d_merge_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_merge_nd
end interface
! !
! CSCNV: switches to a different internal derived type. ! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target ! 3 versions: copying to target
@ -859,7 +881,6 @@ module psb_d_mat_mod
end subroutine psb_d_cscnv end subroutine psb_d_cscnv
end interface end interface
interface interface
subroutine psb_d_cscnv_ip(a,iinfo,type,mold,dupl) subroutine psb_d_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat
@ -871,7 +892,6 @@ module psb_d_mat_mod
end subroutine psb_d_cscnv_ip end subroutine psb_d_cscnv_ip
end interface end interface
interface interface
subroutine psb_d_cscnv_base(a,b,info,dupl) subroutine psb_d_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat

@ -102,6 +102,9 @@ module psb_d_vect_mod
procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => d_vect_abgdxyz
procedure, pass(z) :: xyzw => d_vect_xyzw
procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_v => d_vect_mlt_v
procedure, pass(y) :: mlt_a => d_vect_mlt_a procedure, pass(y) :: mlt_a => d_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2
@ -778,6 +781,38 @@ contains
end subroutine d_vect_axpby_a2 end subroutine d_vect_axpby_a2
subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_vect_type), intent(inout) :: x
class(psb_d_vect_type), intent(inout) :: y
class(psb_d_vect_type), intent(inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine d_vect_abgdxyz
subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_vect_type), intent(inout) :: x
class(psb_d_vect_type), intent(inout) :: y
class(psb_d_vect_type), intent(inout) :: z
class(psb_d_vect_type), intent(inout) :: w
real(psb_dpk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine d_vect_xyzw
subroutine d_vect_mlt_v(x, y, info) subroutine d_vect_mlt_v(x, y, info)
use psi_serial_mod use psi_serial_mod
implicit none implicit none

@ -155,6 +155,9 @@ module psb_s_base_vect_mod
procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_v2 => s_base_axpby_v2
procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => s_base_abgdxyz
procedure, pass(w) :: xyzw => s_base_xyzw
! !
! Vector by vector multiplication. Need all variants ! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners ! to handle multiple requirements from preconditioners
@ -1025,7 +1028,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x The class(base_vect) to be added !! \param x The class(base_vect) to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param info return code !! \param info return code
!! !!
subroutine s_base_axpby_v(m,alpha, x, beta, y, info) subroutine s_base_axpby_v(m,alpha, x, beta, y, info)
@ -1054,7 +1057,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x The class(base_vect) to be added !! \param x The class(base_vect) to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param y The class(base_vect) to be added !! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned !! \param z The class(base_vect) to be returned
!! \param info return code !! \param info return code
@ -1085,7 +1088,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x(:) The array to be added !! \param x(:) The array to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param info return code !! \param info return code
!! !!
subroutine s_base_axpby_a(m,alpha, x, beta, y, info) subroutine s_base_axpby_a(m,alpha, x, beta, y, info)
@ -1133,6 +1136,64 @@ contains
end subroutine s_base_axpby_a2 end subroutine s_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_s_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_s_base_vect_type), intent(inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=szero)) call x%sync()
if (y%is_dev().and.(beta/=szero)) call y%sync()
if (z%is_dev().and.(delta/=szero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine s_base_abgdxyz
subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_s_base_vect_type), intent(inout) :: z
class(psb_s_base_vect_type), intent(inout) :: w
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=szero)) call x%sync()
if (y%is_dev().and.(b/=szero)) call y%sync()
if (z%is_dev().and.(d/=szero)) call z%sync()
if (w%is_dev().and.(f/=szero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine s_base_xyzw
! !
! Multiple variants of two operations: ! Multiple variants of two operations:

@ -579,6 +579,110 @@ module psb_s_csr_mat_mod
end subroutine psb_s_csr_scals end subroutine psb_s_csr_scals
end interface end interface
type, extends(psb_s_csr_sparse_mat) :: psb_s_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => s_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_s_ecsr_csmm
procedure, pass(a) :: csmv => psb_s_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_s_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_s_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_s_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_s_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_s_ecsr_cmp_nerwp
procedure, pass(a) :: free => s_ecsr_free
procedure, pass(a) :: mold => psb_s_ecsr_mold
end type psb_s_ecsr_sparse_mat
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_csmv
interface
subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_s_ecsr_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta, x(:)
real(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_s_ecsr_csmv
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo
interface
subroutine psb_s_ecsr_cmp_nerwp(a,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_ecsr_cmp_nerwp
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo
interface
subroutine psb_s_cp_ecsr_from_coo(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_cp_ecsr_from_coo
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_cp_from_fmt
interface
subroutine psb_s_cp_ecsr_from_fmt(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_cp_ecsr_from_fmt
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_mv_from_coo
interface
subroutine psb_s_mv_ecsr_from_coo(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_mv_ecsr_from_coo
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_mv_from_fmt
interface
subroutine psb_s_mv_ecsr_from_fmt(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_mv_ecsr_from_fmt
end interface
!> \memberof psb_s_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_s_ecsr_mold(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_ls_csr_sparse_mat !> \namespace psb_base_mod \class psb_ls_csr_sparse_mat
!! \extends psb_ls_base_mat_mod::psb_ls_base_sparse_mat !! \extends psb_ls_base_mat_mod::psb_ls_base_sparse_mat
!! !!
@ -1178,6 +1282,26 @@ contains
function s_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function s_ecsr_get_fmt
subroutine s_ecsr_free(a)
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_s_csr_sparse_mat%free()
return
end subroutine s_ecsr_free
! == =================================== ! == ===================================
! !
! !

@ -79,12 +79,14 @@
module psb_s_mat_mod module psb_s_mat_mod
use psb_s_base_mat_mod use psb_s_base_mat_mod
use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat,&
& psb_s_ecsr_sparse_mat
use psb_s_csc_mat_mod, only : psb_s_csc_sparse_mat, psb_ls_csc_sparse_mat use psb_s_csc_mat_mod, only : psb_s_csc_sparse_mat, psb_ls_csc_sparse_mat
type :: psb_sspmat_type type :: psb_sspmat_type
class(psb_s_base_sparse_mat), allocatable :: a class(psb_s_base_sparse_mat), allocatable :: a
class(psb_s_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_ls_coo_sparse_mat), allocatable :: rmta type(psb_ls_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,8 @@ module psb_s_mat_mod
procedure, pass(a) :: cscnv_ip => psb_s_cscnv_ip procedure, pass(a) :: cscnv_ip => psb_s_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_s_cscnv_base procedure, pass(a) :: cscnv_base => psb_s_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_s_split_nd
procedure, pass(a) :: merge_nd => psb_s_merge_nd
procedure, pass(a) :: clone => psb_sspmat_clone procedure, pass(a) :: clone => psb_sspmat_clone
procedure, pass(a) :: move_alloc => psb_sspmat_type_move procedure, pass(a) :: move_alloc => psb_sspmat_type_move
! !
@ -840,6 +844,24 @@ module psb_s_mat_mod
! !
! !
interface
subroutine psb_s_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_split_nd
end interface
interface
subroutine psb_s_merge_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_merge_nd
end interface
! !
! CSCNV: switches to a different internal derived type. ! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target ! 3 versions: copying to target
@ -859,7 +881,6 @@ module psb_s_mat_mod
end subroutine psb_s_cscnv end subroutine psb_s_cscnv
end interface end interface
interface interface
subroutine psb_s_cscnv_ip(a,iinfo,type,mold,dupl) subroutine psb_s_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat
@ -871,7 +892,6 @@ module psb_s_mat_mod
end subroutine psb_s_cscnv_ip end subroutine psb_s_cscnv_ip
end interface end interface
interface interface
subroutine psb_s_cscnv_base(a,b,info,dupl) subroutine psb_s_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat

@ -102,6 +102,9 @@ module psb_s_vect_mod
procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => s_vect_abgdxyz
procedure, pass(z) :: xyzw => s_vect_xyzw
procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_v => s_vect_mlt_v
procedure, pass(y) :: mlt_a => s_vect_mlt_a procedure, pass(y) :: mlt_a => s_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2 procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2
@ -778,6 +781,38 @@ contains
end subroutine s_vect_axpby_a2 end subroutine s_vect_axpby_a2
subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_vect_type), intent(inout) :: x
class(psb_s_vect_type), intent(inout) :: y
class(psb_s_vect_type), intent(inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine s_vect_abgdxyz
subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_vect_type), intent(inout) :: x
class(psb_s_vect_type), intent(inout) :: y
class(psb_s_vect_type), intent(inout) :: z
class(psb_s_vect_type), intent(inout) :: w
real(psb_spk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine s_vect_xyzw
subroutine s_vect_mlt_v(x, y, info) subroutine s_vect_mlt_v(x, y, info)
use psi_serial_mod use psi_serial_mod
implicit none implicit none

@ -36,9 +36,7 @@ module psb_serial_mod
use psb_string_mod use psb_string_mod
use psb_sort_mod use psb_sort_mod
use psi_serial_mod, & use psi_serial_mod
& psb_gth => psi_gth,&
& psb_sct => psi_sct
use psb_s_serial_mod use psb_s_serial_mod
use psb_d_serial_mod use psb_d_serial_mod

@ -155,6 +155,9 @@ module psb_z_base_vect_mod
procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_v2 => z_base_axpby_v2
procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => z_base_abgdxyz
procedure, pass(w) :: xyzw => z_base_xyzw
! !
! Vector by vector multiplication. Need all variants ! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners ! to handle multiple requirements from preconditioners
@ -1018,7 +1021,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x The class(base_vect) to be added !! \param x The class(base_vect) to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param info return code !! \param info return code
!! !!
subroutine z_base_axpby_v(m,alpha, x, beta, y, info) subroutine z_base_axpby_v(m,alpha, x, beta, y, info)
@ -1047,7 +1050,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x The class(base_vect) to be added !! \param x The class(base_vect) to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param y The class(base_vect) to be added !! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned !! \param z The class(base_vect) to be returned
!! \param info return code !! \param info return code
@ -1078,7 +1081,7 @@ contains
!! \param m Number of entries to be considered !! \param m Number of entries to be considered
!! \param alpha scalar alpha !! \param alpha scalar alpha
!! \param x(:) The array to be added !! \param x(:) The array to be added
!! \param beta scalar alpha !! \param beta scalar beta
!! \param info return code !! \param info return code
!! !!
subroutine z_base_axpby_a(m,alpha, x, beta, y, info) subroutine z_base_axpby_a(m,alpha, x, beta, y, info)
@ -1126,6 +1129,64 @@ contains
end subroutine z_base_axpby_a2 end subroutine z_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_z_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_z_base_vect_type), intent(inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=zzero)) call x%sync()
if (y%is_dev().and.(beta/=zzero)) call y%sync()
if (z%is_dev().and.(delta/=zzero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine z_base_abgdxyz
subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_z_base_vect_type), intent(inout) :: z
class(psb_z_base_vect_type), intent(inout) :: w
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=zzero)) call x%sync()
if (y%is_dev().and.(b/=zzero)) call y%sync()
if (z%is_dev().and.(d/=zzero)) call z%sync()
if (w%is_dev().and.(f/=zzero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine z_base_xyzw
! !
! Multiple variants of two operations: ! Multiple variants of two operations:

@ -579,6 +579,110 @@ module psb_z_csr_mat_mod
end subroutine psb_z_csr_scals end subroutine psb_z_csr_scals
end interface end interface
type, extends(psb_z_csr_sparse_mat) :: psb_z_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => z_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_z_ecsr_csmm
procedure, pass(a) :: csmv => psb_z_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_z_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_z_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_z_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_z_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_z_ecsr_cmp_nerwp
procedure, pass(a) :: free => z_ecsr_free
procedure, pass(a) :: mold => psb_z_ecsr_mold
end type psb_z_ecsr_sparse_mat
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_csmv
interface
subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_z_ecsr_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
complex(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_z_ecsr_csmv
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo
interface
subroutine psb_z_ecsr_cmp_nerwp(a,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_ecsr_cmp_nerwp
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo
interface
subroutine psb_z_cp_ecsr_from_coo(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_cp_ecsr_from_coo
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_cp_from_fmt
interface
subroutine psb_z_cp_ecsr_from_fmt(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_cp_ecsr_from_fmt
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_mv_from_coo
interface
subroutine psb_z_mv_ecsr_from_coo(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_mv_ecsr_from_coo
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_mv_from_fmt
interface
subroutine psb_z_mv_ecsr_from_fmt(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_mv_ecsr_from_fmt
end interface
!> \memberof psb_z_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_z_ecsr_mold(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(in) :: a
class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_lz_csr_sparse_mat !> \namespace psb_base_mod \class psb_lz_csr_sparse_mat
!! \extends psb_lz_base_mat_mod::psb_lz_base_sparse_mat !! \extends psb_lz_base_mat_mod::psb_lz_base_sparse_mat
!! !!
@ -1178,6 +1282,26 @@ contains
function z_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function z_ecsr_get_fmt
subroutine z_ecsr_free(a)
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_z_csr_sparse_mat%free()
return
end subroutine z_ecsr_free
! == =================================== ! == ===================================
! !
! !

@ -79,12 +79,14 @@
module psb_z_mat_mod module psb_z_mat_mod
use psb_z_base_mat_mod use psb_z_base_mat_mod
use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat,&
& psb_z_ecsr_sparse_mat
use psb_z_csc_mat_mod, only : psb_z_csc_sparse_mat, psb_lz_csc_sparse_mat use psb_z_csc_mat_mod, only : psb_z_csc_sparse_mat, psb_lz_csc_sparse_mat
type :: psb_zspmat_type type :: psb_zspmat_type
class(psb_z_base_sparse_mat), allocatable :: a class(psb_z_base_sparse_mat), allocatable :: a
class(psb_z_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_ integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_lz_coo_sparse_mat), allocatable :: rmta type(psb_lz_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,8 @@ module psb_z_mat_mod
procedure, pass(a) :: cscnv_ip => psb_z_cscnv_ip procedure, pass(a) :: cscnv_ip => psb_z_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_z_cscnv_base procedure, pass(a) :: cscnv_base => psb_z_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_z_split_nd
procedure, pass(a) :: merge_nd => psb_z_merge_nd
procedure, pass(a) :: clone => psb_zspmat_clone procedure, pass(a) :: clone => psb_zspmat_clone
procedure, pass(a) :: move_alloc => psb_zspmat_type_move procedure, pass(a) :: move_alloc => psb_zspmat_type_move
! !
@ -840,6 +844,24 @@ module psb_z_mat_mod
! !
! !
interface
subroutine psb_z_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_split_nd
end interface
interface
subroutine psb_z_merge_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_merge_nd
end interface
! !
! CSCNV: switches to a different internal derived type. ! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target ! 3 versions: copying to target
@ -859,7 +881,6 @@ module psb_z_mat_mod
end subroutine psb_z_cscnv end subroutine psb_z_cscnv
end interface end interface
interface interface
subroutine psb_z_cscnv_ip(a,iinfo,type,mold,dupl) subroutine psb_z_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat
@ -871,7 +892,6 @@ module psb_z_mat_mod
end subroutine psb_z_cscnv_ip end subroutine psb_z_cscnv_ip
end interface end interface
interface interface
subroutine psb_z_cscnv_base(a,b,info,dupl) subroutine psb_z_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat

@ -102,6 +102,9 @@ module psb_z_vect_mod
procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => z_vect_abgdxyz
procedure, pass(z) :: xyzw => z_vect_xyzw
procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_v => z_vect_mlt_v
procedure, pass(y) :: mlt_a => z_vect_mlt_a procedure, pass(y) :: mlt_a => z_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2 procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2
@ -771,6 +774,38 @@ contains
end subroutine z_vect_axpby_a2 end subroutine z_vect_axpby_a2
subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_vect_type), intent(inout) :: x
class(psb_z_vect_type), intent(inout) :: y
class(psb_z_vect_type), intent(inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine z_vect_abgdxyz
subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_vect_type), intent(inout) :: x
class(psb_z_vect_type), intent(inout) :: y
class(psb_z_vect_type), intent(inout) :: z
class(psb_z_vect_type), intent(inout) :: w
complex(psb_dpk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine z_vect_xyzw
subroutine z_vect_mlt_v(x, y, info) subroutine z_vect_mlt_v(x, y, info)
use psi_serial_mod use psi_serial_mod
implicit none implicit none

@ -250,7 +250,7 @@ Module psb_c_tools_mod
end interface end interface
interface psb_spasb interface psb_spasb
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import import
implicit none implicit none
type(psb_cspmat_type), intent (inout) :: a type(psb_cspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_c_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt character(len=*), optional, intent(in) :: afmt
class(psb_c_base_sparse_mat), intent(in), optional :: mold class(psb_c_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_cspasb end subroutine psb_cspasb
end interface end interface

@ -250,7 +250,7 @@ Module psb_d_tools_mod
end interface end interface
interface psb_spasb interface psb_spasb
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import import
implicit none implicit none
type(psb_dspmat_type), intent (inout) :: a type(psb_dspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_d_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt character(len=*), optional, intent(in) :: afmt
class(psb_d_base_sparse_mat), intent(in), optional :: mold class(psb_d_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_dspasb end subroutine psb_dspasb
end interface end interface

@ -250,7 +250,7 @@ Module psb_s_tools_mod
end interface end interface
interface psb_spasb interface psb_spasb
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import import
implicit none implicit none
type(psb_sspmat_type), intent (inout) :: a type(psb_sspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_s_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt character(len=*), optional, intent(in) :: afmt
class(psb_s_base_sparse_mat), intent(in), optional :: mold class(psb_s_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_sspasb end subroutine psb_sspasb
end interface end interface

@ -250,7 +250,7 @@ Module psb_z_tools_mod
end interface end interface
interface psb_spasb interface psb_spasb
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import import
implicit none implicit none
type(psb_zspmat_type), intent (inout) :: a type(psb_zspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_z_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt character(len=*), optional, intent(in) :: afmt
class(psb_z_base_sparse_mat), intent(in), optional :: mold class(psb_z_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_zspasb end subroutine psb_zspasb
end interface end interface

@ -741,3 +741,86 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info)
return return
end subroutine psb_caddconst_vect end subroutine psb_caddconst_vect
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect
implicit none
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
type(psb_c_vect_type), intent (inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_c_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_cabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
logical :: aliw, doswap_ logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_cspmv' name='psb_cspmv'
info=psb_success_ info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows() m = desc_a%get_global_rows()
n = desc_a%get_global_cols() n = desc_a%get_global_cols()
@ -179,12 +195,44 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then if (trans_ == 'N') then
! Matrix is not transposed ! Matrix is not transposed
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,cone,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_) & czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info) call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
if(info /= psb_success_) then if(info /= psb_success_) then
info = psb_err_from_subroutine_non_ info = psb_err_from_subroutine_non_

@ -741,3 +741,86 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info)
return return
end subroutine psb_daddconst_vect end subroutine psb_daddconst_vect
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect
implicit none
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
type(psb_d_vect_type), intent (inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_d_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
logical :: aliw, doswap_ logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_dspmv' name='psb_dspmv'
info=psb_success_ info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows() m = desc_a%get_global_rows()
n = desc_a%get_global_cols() n = desc_a%get_global_cols()
@ -179,12 +195,44 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then if (trans_ == 'N') then
! Matrix is not transposed ! Matrix is not transposed
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,done,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) & dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info) call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
if(info /= psb_success_) then if(info /= psb_success_) then
info = psb_err_from_subroutine_non_ info = psb_err_from_subroutine_non_

@ -741,3 +741,86 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info)
return return
end subroutine psb_saddconst_vect end subroutine psb_saddconst_vect
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect
implicit none
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
type(psb_s_vect_type), intent (inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_s_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_sabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
logical :: aliw, doswap_ logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_sspmv' name='psb_sspmv'
info=psb_success_ info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows() m = desc_a%get_global_rows()
n = desc_a%get_global_cols() n = desc_a%get_global_cols()
@ -179,12 +195,44 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then if (trans_ == 'N') then
! Matrix is not transposed ! Matrix is not transposed
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,sone,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_) & szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info) call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
if(info /= psb_success_) then if(info /= psb_success_) then
info = psb_err_from_subroutine_non_ info = psb_err_from_subroutine_non_

@ -741,3 +741,86 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info)
return return
end subroutine psb_zaddconst_vect end subroutine psb_zaddconst_vect
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect
implicit none
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
type(psb_z_vect_type), intent (inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_z_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_zabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
logical :: aliw, doswap_ logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_zspmv' name='psb_zspmv'
info=psb_success_ info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows() m = desc_a%get_global_rows()
n = desc_a%get_global_cols() n = desc_a%get_global_cols()
@ -179,12 +195,44 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then if (trans_ == 'N') then
! Matrix is not transposed ! Matrix is not transposed
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,zone,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),& call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_) & zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info) call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
if(info /= psb_success_) then if(info /= psb_success_) then
info = psb_err_from_subroutine_non_ info = psb_err_from_subroutine_non_

@ -2006,8 +2006,8 @@ subroutine psb_c_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back ! For the time being we just throw everything back
! onto the normal routines. ! onto the normal routines.
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans) call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host() call y%set_host()
end subroutine psb_c_base_vect_mv end subroutine psb_c_base_vect_mv
@ -2060,8 +2060,8 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999 goto 9999
end if end if
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
if (present(d)) then if (present(d)) then
call d%sync() call d%sync()
if (present(scale)) then if (present(scale)) then
@ -2082,6 +2082,7 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
if (info == psb_success_)& if (info == psb_success_)&
& call a%inner_spsm(alpha,tmpv,beta,y,info,trans) & call a%inner_spsm(alpha,tmpv,beta,y,info,trans)
call y%set_host()
if (info == psb_success_) then if (info == psb_success_) then
call tmpv%free(info) call tmpv%free(info)
if (info == psb_success_) deallocate(tmpv,stat=info) if (info == psb_success_) deallocate(tmpv,stat=info)
@ -2161,8 +2162,11 @@ subroutine psb_c_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_ info = psb_success_
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_

@ -166,6 +166,7 @@ subroutine psb_c_coo_scals(d,a,info)
call a%make_nonunit() call a%make_nonunit()
end if end if
!$omp parallel do private(i)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
enddo enddo
@ -4174,7 +4175,6 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4205,7 +4205,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! 'iaux' has to allow the threads to have an exclusive group ! 'iaux' has to allow the threads to have an exclusive group
! of indices as work space. Since each thread handles one ! of indices as work space. Since each thread handles one
! row/column at the time, we allocate this way. ! row/column at the time, we allocate this way.
allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) allocate(iaux(MAX((nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
@ -4214,7 +4214,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#else #else
allocate(iaux(nzin+2),stat=info) allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
@ -4268,7 +4268,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals !locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
complex(psb_spk_), allocatable :: vs(:) complex(psb_spk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret integer(psb_ipk_) :: nza, nzl,iret, maxnzr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4301,11 +4301,14 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
use_buffers = .false. use_buffers = .false.
end if end if
if (use_buffers) then !if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) & !$OMP shared(nzin,ia,nr,iaux,maxnzr) &
!$OMP private(i) & !$OMP private(i) &
!$OMP reduction(.and.:use_buffers) !$OMP reduction(.and.:use_buffers)
do i=1,nzin do i=1,nzin
@ -4319,7 +4322,16 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP private(i) shared(nr,iaux)&
!$OMP reduction(max:maxnzr)
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
!$OMP END PARALLEL DO
#else #else
iaux(:) = 0
!srt_inp = .true. !srt_inp = .true.
do i=1,nzin do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4333,8 +4345,12 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) !srt_inp = srt_inp .and.(ia(i-1)<=ia(i))
end do end do
maxnzr = 0
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
#endif #endif
end if !end if
! Check again use_buffers. We enter here if nzin >= nr and ! Check again use_buffers. We enter here if nzin >= nr and
! all the indices are valid ! all the indices are valid
@ -4342,22 +4358,21 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then if (use_buffers) then
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
!$omp workshare
kaux(:) = 0 kaux(:) = 0
sum(:) = 0 !$omp end workshare
sum(1) = 1
err = 0 err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4397,40 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan' !write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione) call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
!$omp end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then !$omp atomic capture
ias(idxaux(act_row)) = ia(i) i1 =idxaux(act_row)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if !$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=idxstart,idxend block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4419,11 +4438,11 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements? ! The row has elements?
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), & call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) & ixt,iret)
if (iret == 0) then if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),& call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), & & ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) & ixt)
end if end if
! Over each row we count the unique values ! Over each row we count the unique values
@ -4436,6 +4455,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do end do
end if end if
end do end do
!$omp end do
deallocate(ixt)
end block
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
@ -4553,7 +4575,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1 nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info) deallocate(kaux,idxaux,stat=info)
#else #else
!if (.not.srt_inp) then !if (.not.srt_inp) then
@ -4710,7 +4732,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
& call psb_ip_reord(nzin,val,ia,ja,iaux) & call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) &
!$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem) !$OMP work,first_elem,last_elem)
@ -4732,6 +4754,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
idxend = idxstart + work - 1 idxend = idxstart + work - 1
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
! --------------------------------------------------- ! ---------------------------------------------------
first_elem = 0 first_elem = 0
@ -4746,11 +4771,10 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret)
if (iret == 0) & if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),& & call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),& & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt)
& iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
end if end if
act_row = act_row + 1 act_row = act_row + 1
@ -4764,6 +4788,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
last_elem = j last_elem = j
end if end if
end do end do
end block
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
i = 1 i = 1

@ -2163,7 +2163,7 @@ subroutine psb_c_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -2328,7 +2328,7 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2461,7 +2461,7 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4058,7 +4058,7 @@ subroutine psb_lc_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -3318,7 +3318,7 @@ subroutine psb_c_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -3489,7 +3489,7 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nr = a%get_nrows() nr = a%get_nrows()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
@ -3594,7 +3594,7 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nr = b%get_nrows() nr = b%get_nrows()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
@ -3805,6 +3805,7 @@ contains
integer(psb_ipk_) :: ma, nb integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows() ma = a%get_nrows()
nb = b%get_ncols() nb = b%get_ncols()
@ -3815,12 +3816,19 @@ contains
! dense accumulator ! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info) call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads())) allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets) !$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
if (start_idx == 0) then if (start_idx == 0) then
@ -3876,15 +3884,14 @@ contains
!$omp end single !$omp end single
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if end if
do irw = start_idx, end_idx - 1 do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do end do
end if
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -3892,9 +3899,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson end subroutine spmm_omp_gustavson
@ -3930,6 +3938,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1 do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -3996,14 +4005,14 @@ contains
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if end if
do irw = start_idx, end_idx - 1 do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do end do
end if
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -4011,9 +4020,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson_1d end subroutine spmm_omp_gustavson_1d
@ -4312,6 +4322,266 @@ contains
end subroutine psb_ccsrspspmm end subroutine psb_ccsrspspmm
#endif #endif
subroutine psb_c_ecsr_mold(a,b,info)
use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold
use psb_error_mod
implicit none
class(psb_c_ecsr_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_c_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_ecsr_mold
subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_csmv
implicit none
class(psb_c_ecsr_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='c_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == cone).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_c_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_c_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_c_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
complex(psb_spk_), intent(in) :: alpha, x(*),val(*)
complex(psb_spk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
complex(psb_spk_) :: acc
if (alpha == czero) return
if (alpha == cone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = czero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -cone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = czero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = czero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_c_ecsr_csmv_inner
end subroutine psb_c_ecsr_csmv
subroutine psb_c_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_cmp_nerwp
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_c_ecsr_cmp_nerwp
subroutine psb_c_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_coo
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_cp_ecsr_from_coo
subroutine psb_c_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_coo
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_mv_ecsr_from_coo
subroutine psb_c_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_fmt
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_mv_ecsr_from_fmt
subroutine psb_c_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_c_base_mat_mod
use psb_realloc_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_fmt
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_cp_ecsr_from_fmt
! !
! !
! lc version ! lc version
@ -6021,7 +6291,7 @@ subroutine psb_lc_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -1213,6 +1213,106 @@ subroutine psb_c_b_csclip(a,b,info,&
end subroutine psb_c_b_csclip end subroutine psb_c_b_csclip
subroutine psb_c_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_c_mat_mod, psb_protect_name => psb_c_split_nd
implicit none
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold
type(psb_c_coo_sparse_mat) :: acoo
type(psb_c_csr_sparse_mat), allocatable :: aclip
type(psb_c_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_split_nd
subroutine psb_c_merge_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_c_mat_mod, psb_protect_name => psb_c_merge_nd
implicit none
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold
type(psb_c_coo_sparse_mat) :: acoo1,acoo2
integer(psb_ipk_) :: nz
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
call a%ad%mv_to_coo(acoo1,info)
call acoo1%set_bld()
call acoo1%set_nrows(n_rows)
call acoo1%set_ncols(n_cols)
call a%and%mv_to_coo(acoo2,info)
nz=acoo2%get_nzeros()
call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info)
if (allocated(a%a)) then
call a%a%free()
deallocate(a%a)
end if
allocate(a%a,mold=a%ad)
call a%a%mv_from_coo(acoo1,info)
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_merge_nd
subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl) subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod use psb_error_mod
use psb_string_mod use psb_string_mod
@ -1246,6 +1346,7 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999 goto 9999
end if end if
if (.false.) then
if (present(mold)) then if (present(mold)) then
allocate(altmp, mold=mold,stat=info) allocate(altmp, mold=mold,stat=info)
@ -1294,6 +1395,16 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
end if end if
call move_alloc(altmp,b%a) call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call b%trim() call b%trim()
call b%set_asb() call b%set_asb()
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1303,7 +1414,79 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_c_base_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold
class(psb_c_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_c_cscnv end subroutine psb_c_cscnv
subroutine psb_c_cscnv_ip(a,info,type,mold,dupl) subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
@ -1318,7 +1501,6 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
character(len=*), optional, intent(in) :: type character(len=*), optional, intent(in) :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold class(psb_c_base_sparse_mat), intent(in), optional :: mold
class(psb_c_base_sparse_mat), allocatable :: altmp class(psb_c_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip' character(len=20) :: name='cscnv_ip'
@ -1345,6 +1527,7 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
goto 9999 goto 9999
end if end if
if (.false.) then
if (present(mold)) then if (present(mold)) then
allocate(altmp, mold=mold,stat=info) allocate(altmp, mold=mold,stat=info)
@ -1377,14 +1560,22 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
& a%get_fmt(),' to ',altmp%get_fmt() & a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info) call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from") call psb_errpush(info,name,a_err="mv_from")
goto 9999 goto 9999
end if end if
call move_alloc(altmp,a%a)
call a%trim() call a%trim()
call a%set_asb() call a%set_asb()
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1394,6 +1585,77 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_c_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold
class(psb_c_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_c_cscnv_ip end subroutine psb_c_cscnv_ip

@ -2006,8 +2006,8 @@ subroutine psb_d_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back ! For the time being we just throw everything back
! onto the normal routines. ! onto the normal routines.
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans) call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host() call y%set_host()
end subroutine psb_d_base_vect_mv end subroutine psb_d_base_vect_mv
@ -2060,8 +2060,8 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999 goto 9999
end if end if
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
if (present(d)) then if (present(d)) then
call d%sync() call d%sync()
if (present(scale)) then if (present(scale)) then
@ -2082,6 +2082,7 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
if (info == psb_success_)& if (info == psb_success_)&
& call a%inner_spsm(alpha,tmpv,beta,y,info,trans) & call a%inner_spsm(alpha,tmpv,beta,y,info,trans)
call y%set_host()
if (info == psb_success_) then if (info == psb_success_) then
call tmpv%free(info) call tmpv%free(info)
if (info == psb_success_) deallocate(tmpv,stat=info) if (info == psb_success_) deallocate(tmpv,stat=info)
@ -2161,8 +2162,11 @@ subroutine psb_d_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_ info = psb_success_
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_

@ -166,6 +166,7 @@ subroutine psb_d_coo_scals(d,a,info)
call a%make_nonunit() call a%make_nonunit()
end if end if
!$omp parallel do private(i)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
enddo enddo
@ -4174,7 +4175,6 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4205,7 +4205,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! 'iaux' has to allow the threads to have an exclusive group ! 'iaux' has to allow the threads to have an exclusive group
! of indices as work space. Since each thread handles one ! of indices as work space. Since each thread handles one
! row/column at the time, we allocate this way. ! row/column at the time, we allocate this way.
allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) allocate(iaux(MAX((nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
@ -4214,7 +4214,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#else #else
allocate(iaux(nzin+2),stat=info) allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
@ -4268,7 +4268,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals !locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
real(psb_dpk_), allocatable :: vs(:) real(psb_dpk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret integer(psb_ipk_) :: nza, nzl,iret, maxnzr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4301,11 +4301,14 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
use_buffers = .false. use_buffers = .false.
end if end if
if (use_buffers) then !if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) & !$OMP shared(nzin,ia,nr,iaux,maxnzr) &
!$OMP private(i) & !$OMP private(i) &
!$OMP reduction(.and.:use_buffers) !$OMP reduction(.and.:use_buffers)
do i=1,nzin do i=1,nzin
@ -4319,7 +4322,16 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP private(i) shared(nr,iaux)&
!$OMP reduction(max:maxnzr)
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
!$OMP END PARALLEL DO
#else #else
iaux(:) = 0
!srt_inp = .true. !srt_inp = .true.
do i=1,nzin do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4333,8 +4345,12 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) !srt_inp = srt_inp .and.(ia(i-1)<=ia(i))
end do end do
maxnzr = 0
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
#endif #endif
end if !end if
! Check again use_buffers. We enter here if nzin >= nr and ! Check again use_buffers. We enter here if nzin >= nr and
! all the indices are valid ! all the indices are valid
@ -4342,22 +4358,21 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then if (use_buffers) then
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
!$omp workshare
kaux(:) = 0 kaux(:) = 0
sum(:) = 0 !$omp end workshare
sum(1) = 1
err = 0 err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4397,40 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan' !write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione) call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
!$omp end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then !$omp atomic capture
ias(idxaux(act_row)) = ia(i) i1 =idxaux(act_row)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if !$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=idxstart,idxend block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4419,11 +4438,11 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements? ! The row has elements?
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), & call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) & ixt,iret)
if (iret == 0) then if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),& call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), & & ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) & ixt)
end if end if
! Over each row we count the unique values ! Over each row we count the unique values
@ -4436,6 +4455,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do end do
end if end if
end do end do
!$omp end do
deallocate(ixt)
end block
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
@ -4553,7 +4575,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1 nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info) deallocate(kaux,idxaux,stat=info)
#else #else
!if (.not.srt_inp) then !if (.not.srt_inp) then
@ -4710,7 +4732,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
& call psb_ip_reord(nzin,val,ia,ja,iaux) & call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) &
!$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem) !$OMP work,first_elem,last_elem)
@ -4732,6 +4754,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
idxend = idxstart + work - 1 idxend = idxstart + work - 1
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
! --------------------------------------------------- ! ---------------------------------------------------
first_elem = 0 first_elem = 0
@ -4746,11 +4771,10 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret)
if (iret == 0) & if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),& & call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),& & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt)
& iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
end if end if
act_row = act_row + 1 act_row = act_row + 1
@ -4764,6 +4788,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
last_elem = j last_elem = j
end if end if
end do end do
end block
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
i = 1 i = 1

@ -2163,7 +2163,7 @@ subroutine psb_d_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -2328,7 +2328,7 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2461,7 +2461,7 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4058,7 +4058,7 @@ subroutine psb_ld_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -3318,7 +3318,7 @@ subroutine psb_d_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -3489,7 +3489,7 @@ subroutine psb_d_cp_csr_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nr = a%get_nrows() nr = a%get_nrows()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
@ -3594,7 +3594,7 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nr = b%get_nrows() nr = b%get_nrows()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
@ -3805,6 +3805,7 @@ contains
integer(psb_ipk_) :: ma, nb integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows() ma = a%get_nrows()
nb = b%get_ncols() nb = b%get_ncols()
@ -3815,12 +3816,19 @@ contains
! dense accumulator ! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info) call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads())) allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets) !$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
if (start_idx == 0) then if (start_idx == 0) then
@ -3876,15 +3884,14 @@ contains
!$omp end single !$omp end single
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if end if
do irw = start_idx, end_idx - 1 do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do end do
end if
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -3892,9 +3899,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson end subroutine spmm_omp_gustavson
@ -3930,6 +3938,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1 do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -3996,14 +4005,14 @@ contains
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if end if
do irw = start_idx, end_idx - 1 do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do end do
end if
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -4011,9 +4020,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson_1d end subroutine spmm_omp_gustavson_1d
@ -4312,6 +4322,266 @@ contains
end subroutine psb_dcsrspspmm end subroutine psb_dcsrspspmm
#endif #endif
subroutine psb_d_ecsr_mold(a,b,info)
use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_mold
use psb_error_mod
implicit none
class(psb_d_ecsr_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_d_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_ecsr_mold
subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_csmv
implicit none
class(psb_d_ecsr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='d_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == done).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_d_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_d_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_d_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
real(psb_dpk_), intent(in) :: alpha, x(*),val(*)
real(psb_dpk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
real(psb_dpk_) :: acc
if (alpha == dzero) return
if (alpha == done) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -done) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_d_ecsr_csmv_inner
end subroutine psb_d_ecsr_csmv
subroutine psb_d_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_cmp_nerwp
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_d_ecsr_cmp_nerwp
subroutine psb_d_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_coo
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_cp_ecsr_from_coo
subroutine psb_d_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_coo
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_mv_ecsr_from_coo
subroutine psb_d_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_fmt
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_mv_ecsr_from_fmt
subroutine psb_d_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_d_base_mat_mod
use psb_realloc_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_fmt
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_cp_ecsr_from_fmt
! !
! !
! ld version ! ld version
@ -6021,7 +6291,7 @@ subroutine psb_ld_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -1213,6 +1213,106 @@ subroutine psb_d_b_csclip(a,b,info,&
end subroutine psb_d_b_csclip end subroutine psb_d_b_csclip
subroutine psb_d_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_d_mat_mod, psb_protect_name => psb_d_split_nd
implicit none
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold
type(psb_d_coo_sparse_mat) :: acoo
type(psb_d_csr_sparse_mat), allocatable :: aclip
type(psb_d_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_split_nd
subroutine psb_d_merge_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_d_mat_mod, psb_protect_name => psb_d_merge_nd
implicit none
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold
type(psb_d_coo_sparse_mat) :: acoo1,acoo2
integer(psb_ipk_) :: nz
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
call a%ad%mv_to_coo(acoo1,info)
call acoo1%set_bld()
call acoo1%set_nrows(n_rows)
call acoo1%set_ncols(n_cols)
call a%and%mv_to_coo(acoo2,info)
nz=acoo2%get_nzeros()
call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info)
if (allocated(a%a)) then
call a%a%free()
deallocate(a%a)
end if
allocate(a%a,mold=a%ad)
call a%a%mv_from_coo(acoo1,info)
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_merge_nd
subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl) subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod use psb_error_mod
use psb_string_mod use psb_string_mod
@ -1246,6 +1346,7 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999 goto 9999
end if end if
if (.false.) then
if (present(mold)) then if (present(mold)) then
allocate(altmp, mold=mold,stat=info) allocate(altmp, mold=mold,stat=info)
@ -1294,6 +1395,16 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
end if end if
call move_alloc(altmp,b%a) call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call b%trim() call b%trim()
call b%set_asb() call b%set_asb()
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1303,7 +1414,79 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_d_base_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold
class(psb_d_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_d_cscnv end subroutine psb_d_cscnv
subroutine psb_d_cscnv_ip(a,info,type,mold,dupl) subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
@ -1318,7 +1501,6 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
character(len=*), optional, intent(in) :: type character(len=*), optional, intent(in) :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold class(psb_d_base_sparse_mat), intent(in), optional :: mold
class(psb_d_base_sparse_mat), allocatable :: altmp class(psb_d_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip' character(len=20) :: name='cscnv_ip'
@ -1345,6 +1527,7 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
goto 9999 goto 9999
end if end if
if (.false.) then
if (present(mold)) then if (present(mold)) then
allocate(altmp, mold=mold,stat=info) allocate(altmp, mold=mold,stat=info)
@ -1377,14 +1560,22 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
& a%get_fmt(),' to ',altmp%get_fmt() & a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info) call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from") call psb_errpush(info,name,a_err="mv_from")
goto 9999 goto 9999
end if end if
call move_alloc(altmp,a%a)
call a%trim() call a%trim()
call a%set_asb() call a%set_asb()
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1394,6 +1585,77 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_d_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold
class(psb_d_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_d_cscnv_ip end subroutine psb_d_cscnv_ip

@ -2006,8 +2006,8 @@ subroutine psb_s_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back ! For the time being we just throw everything back
! onto the normal routines. ! onto the normal routines.
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans) call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host() call y%set_host()
end subroutine psb_s_base_vect_mv end subroutine psb_s_base_vect_mv
@ -2060,8 +2060,8 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999 goto 9999
end if end if
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
if (present(d)) then if (present(d)) then
call d%sync() call d%sync()
if (present(scale)) then if (present(scale)) then
@ -2082,6 +2082,7 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
if (info == psb_success_)& if (info == psb_success_)&
& call a%inner_spsm(alpha,tmpv,beta,y,info,trans) & call a%inner_spsm(alpha,tmpv,beta,y,info,trans)
call y%set_host()
if (info == psb_success_) then if (info == psb_success_) then
call tmpv%free(info) call tmpv%free(info)
if (info == psb_success_) deallocate(tmpv,stat=info) if (info == psb_success_) deallocate(tmpv,stat=info)
@ -2161,8 +2162,11 @@ subroutine psb_s_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_ info = psb_success_
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_

@ -166,6 +166,7 @@ subroutine psb_s_coo_scals(d,a,info)
call a%make_nonunit() call a%make_nonunit()
end if end if
!$omp parallel do private(i)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
enddo enddo
@ -4174,7 +4175,6 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4205,7 +4205,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! 'iaux' has to allow the threads to have an exclusive group ! 'iaux' has to allow the threads to have an exclusive group
! of indices as work space. Since each thread handles one ! of indices as work space. Since each thread handles one
! row/column at the time, we allocate this way. ! row/column at the time, we allocate this way.
allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) allocate(iaux(MAX((nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
@ -4214,7 +4214,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#else #else
allocate(iaux(nzin+2),stat=info) allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
@ -4268,7 +4268,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals !locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
real(psb_spk_), allocatable :: vs(:) real(psb_spk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret integer(psb_ipk_) :: nza, nzl,iret, maxnzr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4301,11 +4301,14 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
use_buffers = .false. use_buffers = .false.
end if end if
if (use_buffers) then !if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) & !$OMP shared(nzin,ia,nr,iaux,maxnzr) &
!$OMP private(i) & !$OMP private(i) &
!$OMP reduction(.and.:use_buffers) !$OMP reduction(.and.:use_buffers)
do i=1,nzin do i=1,nzin
@ -4319,7 +4322,16 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP private(i) shared(nr,iaux)&
!$OMP reduction(max:maxnzr)
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
!$OMP END PARALLEL DO
#else #else
iaux(:) = 0
!srt_inp = .true. !srt_inp = .true.
do i=1,nzin do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4333,8 +4345,12 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) !srt_inp = srt_inp .and.(ia(i-1)<=ia(i))
end do end do
maxnzr = 0
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
#endif #endif
end if !end if
! Check again use_buffers. We enter here if nzin >= nr and ! Check again use_buffers. We enter here if nzin >= nr and
! all the indices are valid ! all the indices are valid
@ -4342,22 +4358,21 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then if (use_buffers) then
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
!$omp workshare
kaux(:) = 0 kaux(:) = 0
sum(:) = 0 !$omp end workshare
sum(1) = 1
err = 0 err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4397,40 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan' !write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione) call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
!$omp end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then !$omp atomic capture
ias(idxaux(act_row)) = ia(i) i1 =idxaux(act_row)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if !$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=idxstart,idxend block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4419,11 +4438,11 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements? ! The row has elements?
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), & call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) & ixt,iret)
if (iret == 0) then if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),& call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), & & ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) & ixt)
end if end if
! Over each row we count the unique values ! Over each row we count the unique values
@ -4436,6 +4455,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do end do
end if end if
end do end do
!$omp end do
deallocate(ixt)
end block
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
@ -4553,7 +4575,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1 nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info) deallocate(kaux,idxaux,stat=info)
#else #else
!if (.not.srt_inp) then !if (.not.srt_inp) then
@ -4710,7 +4732,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
& call psb_ip_reord(nzin,val,ia,ja,iaux) & call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) &
!$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem) !$OMP work,first_elem,last_elem)
@ -4732,6 +4754,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
idxend = idxstart + work - 1 idxend = idxstart + work - 1
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
! --------------------------------------------------- ! ---------------------------------------------------
first_elem = 0 first_elem = 0
@ -4746,11 +4771,10 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret)
if (iret == 0) & if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),& & call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),& & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt)
& iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
end if end if
act_row = act_row + 1 act_row = act_row + 1
@ -4764,6 +4788,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
last_elem = j last_elem = j
end if end if
end do end do
end block
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
i = 1 i = 1

@ -2163,7 +2163,7 @@ subroutine psb_s_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -2328,7 +2328,7 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2461,7 +2461,7 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4058,7 +4058,7 @@ subroutine psb_ls_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -3318,7 +3318,7 @@ subroutine psb_s_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -3489,7 +3489,7 @@ subroutine psb_s_cp_csr_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nr = a%get_nrows() nr = a%get_nrows()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
@ -3594,7 +3594,7 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nr = b%get_nrows() nr = b%get_nrows()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
@ -3805,6 +3805,7 @@ contains
integer(psb_ipk_) :: ma, nb integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows() ma = a%get_nrows()
nb = b%get_ncols() nb = b%get_ncols()
@ -3815,12 +3816,19 @@ contains
! dense accumulator ! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info) call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads())) allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets) !$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
if (start_idx == 0) then if (start_idx == 0) then
@ -3876,15 +3884,14 @@ contains
!$omp end single !$omp end single
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if end if
do irw = start_idx, end_idx - 1 do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do end do
end if
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -3892,9 +3899,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson end subroutine spmm_omp_gustavson
@ -3930,6 +3938,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1 do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -3996,14 +4005,14 @@ contains
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if end if
do irw = start_idx, end_idx - 1 do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do end do
end if
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -4011,9 +4020,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson_1d end subroutine spmm_omp_gustavson_1d
@ -4312,6 +4322,266 @@ contains
end subroutine psb_scsrspspmm end subroutine psb_scsrspspmm
#endif #endif
subroutine psb_s_ecsr_mold(a,b,info)
use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_mold
use psb_error_mod
implicit none
class(psb_s_ecsr_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_s_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_ecsr_mold
subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_csmv
implicit none
class(psb_s_ecsr_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta, x(:)
real(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='s_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == sone).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_s_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_s_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_s_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
real(psb_spk_), intent(in) :: alpha, x(*),val(*)
real(psb_spk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
real(psb_spk_) :: acc
if (alpha == szero) return
if (alpha == sone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = szero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -sone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = szero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = szero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_s_ecsr_csmv_inner
end subroutine psb_s_ecsr_csmv
subroutine psb_s_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_cmp_nerwp
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_s_ecsr_cmp_nerwp
subroutine psb_s_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_coo
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_cp_ecsr_from_coo
subroutine psb_s_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_coo
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_mv_ecsr_from_coo
subroutine psb_s_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_fmt
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_mv_ecsr_from_fmt
subroutine psb_s_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_s_base_mat_mod
use psb_realloc_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_fmt
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_cp_ecsr_from_fmt
! !
! !
! ls version ! ls version
@ -6021,7 +6291,7 @@ subroutine psb_ls_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -1213,6 +1213,106 @@ subroutine psb_s_b_csclip(a,b,info,&
end subroutine psb_s_b_csclip end subroutine psb_s_b_csclip
subroutine psb_s_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_s_mat_mod, psb_protect_name => psb_s_split_nd
implicit none
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold
type(psb_s_coo_sparse_mat) :: acoo
type(psb_s_csr_sparse_mat), allocatable :: aclip
type(psb_s_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_split_nd
subroutine psb_s_merge_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_s_mat_mod, psb_protect_name => psb_s_merge_nd
implicit none
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold
type(psb_s_coo_sparse_mat) :: acoo1,acoo2
integer(psb_ipk_) :: nz
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
call a%ad%mv_to_coo(acoo1,info)
call acoo1%set_bld()
call acoo1%set_nrows(n_rows)
call acoo1%set_ncols(n_cols)
call a%and%mv_to_coo(acoo2,info)
nz=acoo2%get_nzeros()
call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info)
if (allocated(a%a)) then
call a%a%free()
deallocate(a%a)
end if
allocate(a%a,mold=a%ad)
call a%a%mv_from_coo(acoo1,info)
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_merge_nd
subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl) subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod use psb_error_mod
use psb_string_mod use psb_string_mod
@ -1246,6 +1346,7 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999 goto 9999
end if end if
if (.false.) then
if (present(mold)) then if (present(mold)) then
allocate(altmp, mold=mold,stat=info) allocate(altmp, mold=mold,stat=info)
@ -1294,6 +1395,16 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
end if end if
call move_alloc(altmp,b%a) call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call b%trim() call b%trim()
call b%set_asb() call b%set_asb()
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1303,7 +1414,79 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_s_base_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold
class(psb_s_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_s_cscnv end subroutine psb_s_cscnv
subroutine psb_s_cscnv_ip(a,info,type,mold,dupl) subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
@ -1318,7 +1501,6 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
character(len=*), optional, intent(in) :: type character(len=*), optional, intent(in) :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold class(psb_s_base_sparse_mat), intent(in), optional :: mold
class(psb_s_base_sparse_mat), allocatable :: altmp class(psb_s_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip' character(len=20) :: name='cscnv_ip'
@ -1345,6 +1527,7 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
goto 9999 goto 9999
end if end if
if (.false.) then
if (present(mold)) then if (present(mold)) then
allocate(altmp, mold=mold,stat=info) allocate(altmp, mold=mold,stat=info)
@ -1377,14 +1560,22 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
& a%get_fmt(),' to ',altmp%get_fmt() & a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info) call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from") call psb_errpush(info,name,a_err="mv_from")
goto 9999 goto 9999
end if end if
call move_alloc(altmp,a%a)
call a%trim() call a%trim()
call a%set_asb() call a%set_asb()
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1394,6 +1585,77 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_s_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold
class(psb_s_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_s_cscnv_ip end subroutine psb_s_cscnv_ip

@ -2006,8 +2006,8 @@ subroutine psb_z_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back ! For the time being we just throw everything back
! onto the normal routines. ! onto the normal routines.
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans) call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host() call y%set_host()
end subroutine psb_z_base_vect_mv end subroutine psb_z_base_vect_mv
@ -2060,8 +2060,8 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999 goto 9999
end if end if
call x%sync() if (x%is_dev()) call x%sync()
call y%sync() if (y%is_dev()) call y%sync()
if (present(d)) then if (present(d)) then
call d%sync() call d%sync()
if (present(scale)) then if (present(scale)) then
@ -2082,6 +2082,7 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
if (info == psb_success_)& if (info == psb_success_)&
& call a%inner_spsm(alpha,tmpv,beta,y,info,trans) & call a%inner_spsm(alpha,tmpv,beta,y,info,trans)
call y%set_host()
if (info == psb_success_) then if (info == psb_success_) then
call tmpv%free(info) call tmpv%free(info)
if (info == psb_success_) deallocate(tmpv,stat=info) if (info == psb_success_) deallocate(tmpv,stat=info)
@ -2161,8 +2162,11 @@ subroutine psb_z_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_ info = psb_success_
call psb_erractionsave(err_act) call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans) call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_

@ -166,6 +166,7 @@ subroutine psb_z_coo_scals(d,a,info)
call a%make_nonunit() call a%make_nonunit()
end if end if
!$omp parallel do private(i)
do i=1,a%get_nzeros() do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d a%val(i) = a%val(i) * d
enddo enddo
@ -4174,7 +4175,6 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4205,7 +4205,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! 'iaux' has to allow the threads to have an exclusive group ! 'iaux' has to allow the threads to have an exclusive group
! of indices as work space. Since each thread handles one ! of indices as work space. Since each thread handles one
! row/column at the time, we allocate this way. ! row/column at the time, we allocate this way.
allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info) allocate(iaux(MAX((nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
@ -4214,7 +4214,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#else #else
allocate(iaux(nzin+2),stat=info) allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
@ -4268,7 +4268,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals !locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:) integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
complex(psb_dpk_), allocatable :: vs(:) complex(psb_dpk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret integer(psb_ipk_) :: nza, nzl,iret, maxnzr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2 integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo' character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP) #if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:) integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif #endif
info = psb_success_ info = psb_success_
@ -4301,11 +4301,14 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
use_buffers = .false. use_buffers = .false.
end if end if
if (use_buffers) then !if (use_buffers) then
iaux(:) = 0
#if defined(OPENMP) #if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) & !$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) & !$OMP shared(nzin,ia,nr,iaux,maxnzr) &
!$OMP private(i) & !$OMP private(i) &
!$OMP reduction(.and.:use_buffers) !$OMP reduction(.and.:use_buffers)
do i=1,nzin do i=1,nzin
@ -4319,7 +4322,16 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if end if
end do end do
!$OMP END PARALLEL DO !$OMP END PARALLEL DO
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP private(i) shared(nr,iaux)&
!$OMP reduction(max:maxnzr)
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
!$OMP END PARALLEL DO
#else #else
iaux(:) = 0
!srt_inp = .true. !srt_inp = .true.
do i=1,nzin do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4333,8 +4345,12 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!srt_inp = srt_inp .and.(ia(i-1)<=ia(i)) !srt_inp = srt_inp .and.(ia(i-1)<=ia(i))
end do end do
maxnzr = 0
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
#endif #endif
end if !end if
! Check again use_buffers. We enter here if nzin >= nr and ! Check again use_buffers. We enter here if nzin >= nr and
! all the indices are valid ! all the indices are valid
@ -4342,22 +4358,21 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then if (use_buffers) then
#if defined(OPENMP) #if defined(OPENMP)
maxthreads = omp_get_max_threads() maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info) allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_alloc_dealloc_ info = psb_err_alloc_dealloc_
call psb_errpush(info,name) call psb_errpush(info,name)
goto 9999 goto 9999
end if end if
!$omp workshare
kaux(:) = 0 kaux(:) = 0
sum(:) = 0 !$omp end workshare
sum(1) = 1
err = 0 err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting ! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux' ! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) & !$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, & !$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info) !$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4397,40 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan' !write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione) call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER !$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers ------------------- ! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving ! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux' ! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j) idxaux(j) = iaux(j)
end do end do
!$omp end do
! Here we sort data inside the auxiliary buffers ! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin do i=1,nzin
act_row = ia(i) act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then !$omp atomic capture
ias(idxaux(act_row)) = ia(i) i1 =idxaux(act_row)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
idxaux(act_row) = idxaux(act_row) + 1 idxaux(act_row) = idxaux(act_row) + 1
end if !$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store ! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux' ! the number of unique values in 'kaux'
do j=idxstart,idxend block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j) first_elem = iaux(j)
last_elem = iaux(j+1) - 1 last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
@ -4419,11 +4438,11 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements? ! The row has elements?
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), & call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) & ixt,iret)
if (iret == 0) then if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),& call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), & & ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2)) & ixt)
end if end if
! Over each row we count the unique values ! Over each row we count the unique values
@ -4436,6 +4455,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do end do
end if end if
end do end do
!$omp end do
deallocate(ixt)
end block
! -------------------------------------------------- ! --------------------------------------------------
! ---------------- kaux composition ---------------- ! ---------------- kaux composition ----------------
@ -4553,7 +4575,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1 nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info) deallocate(kaux,idxaux,stat=info)
#else #else
!if (.not.srt_inp) then !if (.not.srt_inp) then
@ -4710,7 +4732,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
& call psb_ip_reord(nzin,val,ia,ja,iaux) & call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP) #if defined(OPENMP)
!$OMP PARALLEL default(none) & !$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) & !$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) &
!$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, & !$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem) !$OMP work,first_elem,last_elem)
@ -4732,6 +4754,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
idxend = idxstart + work - 1 idxend = idxstart + work - 1
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
! --------------------------------------------------- ! ---------------------------------------------------
first_elem = 0 first_elem = 0
@ -4746,11 +4771,10 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzl = last_elem - first_elem + 1 nzl = last_elem - first_elem + 1
if (nzl > 0) then if (nzl > 0) then
call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret) call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret)
if (iret == 0) & if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),& & call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),& & ia(first_elem:last_elem),ja(first_elem:last_elem),ixt)
& iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
end if end if
act_row = act_row + 1 act_row = act_row + 1
@ -4764,6 +4788,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
last_elem = j last_elem = j
end if end if
end do end do
end block
!$OMP END PARALLEL !$OMP END PARALLEL
#else #else
i = 1 i = 1

@ -2163,7 +2163,7 @@ subroutine psb_z_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -2328,7 +2328,7 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nc = a%get_ncols() nc = a%get_ncols()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info) if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info) if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2461,7 +2461,7 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nc = b%get_ncols() nc = b%get_ncols()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info) if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info) if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4058,7 +4058,7 @@ subroutine psb_lz_mv_csc_to_coo(a,b,info)
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -3318,7 +3318,7 @@ subroutine psb_z_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())
@ -3489,7 +3489,7 @@ subroutine psb_z_cp_csr_to_fmt(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nr = a%get_nrows() nr = a%get_nrows()
nz = a%get_nzeros() nz = max(a%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info) if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info) if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
@ -3594,7 +3594,7 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info)
if (b%is_dev()) call b%sync() if (b%is_dev()) call b%sync()
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nr = b%get_nrows() nr = b%get_nrows()
nz = b%get_nzeros() nz = max(b%get_nzeros(),ione)
if (.false.) then if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info) if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info) if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
@ -3805,6 +3805,7 @@ contains
integer(psb_ipk_) :: ma, nb integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:) integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows() ma = a%get_nrows()
nb = b%get_ncols() nb = b%get_ncols()
@ -3815,12 +3816,19 @@ contains
! dense accumulator ! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info) call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads())) allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) & !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets) !$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
if (start_idx == 0) then if (start_idx == 0) then
@ -3876,15 +3884,14 @@ contains
!$omp end single !$omp end single
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if end if
do irw = start_idx, end_idx - 1 do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do end do
end if
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -3892,9 +3899,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson end subroutine spmm_omp_gustavson
@ -3930,6 +3938,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets) !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0 thread_upperbound = 0
start_idx = 0 start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j) !$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1 do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -3996,14 +4005,14 @@ contains
!$omp barrier !$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1 c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if end if
do irw = start_idx, end_idx - 1 do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw) c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do end do
end if
!$omp barrier !$omp barrier
!$omp single !$omp single
@ -4011,9 +4020,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info) call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info) call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single !$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz) c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz) c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel !$omp end parallel
end subroutine spmm_omp_gustavson_1d end subroutine spmm_omp_gustavson_1d
@ -4312,6 +4322,266 @@ contains
end subroutine psb_zcsrspspmm end subroutine psb_zcsrspspmm
#endif #endif
subroutine psb_z_ecsr_mold(a,b,info)
use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_mold
use psb_error_mod
implicit none
class(psb_z_ecsr_sparse_mat), intent(in) :: a
class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_z_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_ecsr_mold
subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_csmv
implicit none
class(psb_z_ecsr_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
complex(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='z_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == zone).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_z_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_z_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_z_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
complex(psb_dpk_), intent(in) :: alpha, x(*),val(*)
complex(psb_dpk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
complex(psb_dpk_) :: acc
if (alpha == zzero) return
if (alpha == zone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = zzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -zone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = zzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = zzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_z_ecsr_csmv_inner
end subroutine psb_z_ecsr_csmv
subroutine psb_z_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_cmp_nerwp
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_z_ecsr_cmp_nerwp
subroutine psb_z_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_coo
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_cp_ecsr_from_coo
subroutine psb_z_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_coo
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_mv_ecsr_from_coo
subroutine psb_z_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_fmt
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_mv_ecsr_from_fmt
subroutine psb_z_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_z_base_mat_mod
use psb_realloc_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_fmt
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_cp_ecsr_from_fmt
! !
! !
! lz version ! lz version
@ -6021,7 +6291,7 @@ subroutine psb_lz_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync() if (a%is_dev()) call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
nc = a%get_ncols() nc = a%get_ncols()
nza = a%get_nzeros() nza = max(a%get_nzeros(),ione)
b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat
call b%set_nzeros(a%get_nzeros()) call b%set_nzeros(a%get_nzeros())

@ -1213,6 +1213,106 @@ subroutine psb_z_b_csclip(a,b,info,&
end subroutine psb_z_b_csclip end subroutine psb_z_b_csclip
subroutine psb_z_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_z_mat_mod, psb_protect_name => psb_z_split_nd
implicit none
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold
type(psb_z_coo_sparse_mat) :: acoo
type(psb_z_csr_sparse_mat), allocatable :: aclip
type(psb_z_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_split_nd
subroutine psb_z_merge_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_z_mat_mod, psb_protect_name => psb_z_merge_nd
implicit none
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold
type(psb_z_coo_sparse_mat) :: acoo1,acoo2
integer(psb_ipk_) :: nz
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
call a%ad%mv_to_coo(acoo1,info)
call acoo1%set_bld()
call acoo1%set_nrows(n_rows)
call acoo1%set_ncols(n_cols)
call a%and%mv_to_coo(acoo2,info)
nz=acoo2%get_nzeros()
call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info)
if (allocated(a%a)) then
call a%a%free()
deallocate(a%a)
end if
allocate(a%a,mold=a%ad)
call a%a%mv_from_coo(acoo1,info)
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_merge_nd
subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl) subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod use psb_error_mod
use psb_string_mod use psb_string_mod
@ -1246,6 +1346,7 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999 goto 9999
end if end if
if (.false.) then
if (present(mold)) then if (present(mold)) then
allocate(altmp, mold=mold,stat=info) allocate(altmp, mold=mold,stat=info)
@ -1294,6 +1395,16 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
end if end if
call move_alloc(altmp,b%a) call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call b%trim() call b%trim()
call b%set_asb() call b%set_asb()
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1303,7 +1414,79 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_z_base_sparse_mat), intent(in) :: a
class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold
class(psb_z_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_z_cscnv end subroutine psb_z_cscnv
subroutine psb_z_cscnv_ip(a,info,type,mold,dupl) subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
@ -1318,7 +1501,6 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
character(len=*), optional, intent(in) :: type character(len=*), optional, intent(in) :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold class(psb_z_base_sparse_mat), intent(in), optional :: mold
class(psb_z_base_sparse_mat), allocatable :: altmp class(psb_z_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip' character(len=20) :: name='cscnv_ip'
@ -1345,6 +1527,7 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
goto 9999 goto 9999
end if end if
if (.false.) then
if (present(mold)) then if (present(mold)) then
allocate(altmp, mold=mold,stat=info) allocate(altmp, mold=mold,stat=info)
@ -1377,14 +1560,22 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
& a%get_fmt(),' to ',altmp%get_fmt() & a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info) call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then if (info /= psb_success_) then
info = psb_err_from_subroutine_ info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from") call psb_errpush(info,name,a_err="mv_from")
goto 9999 goto 9999
end if end if
call move_alloc(altmp,a%a)
call a%trim() call a%trim()
call a%set_asb() call a%set_asb()
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
@ -1394,6 +1585,77 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act) 9999 call psb_error_handler(err_act)
return return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_z_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold
class(psb_z_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_z_cscnv_ip end subroutine psb_z_cscnv_ip

@ -1567,3 +1567,300 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return return
end subroutine caxpbyv2 end subroutine caxpbyv2
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='cabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == czero) then
if (gamma == czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = czero
end do
else if (delta /= czero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = czero
end do
else if (delta /= czero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = czero ! gamma*y(i)
end do
else if (delta /= czero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= czero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= czero) then
if (gamma == czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = czero
end do
else if (delta /= czero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = czero
end do
else if (delta /= czero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= czero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= czero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_cabgdxyz
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (inout) :: w(:)
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='cabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==czero).or.(b==czero).or. &
& (c==czero).or.(d==czero).or.&
& (e==czero).or.(f==czero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_cxyzw

@ -1567,3 +1567,300 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return return
end subroutine daxpbyv2 end subroutine daxpbyv2
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='dabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == dzero) then
if (gamma == dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = dzero
end do
else if (delta /= dzero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = dzero
end do
else if (delta /= dzero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = dzero ! gamma*y(i)
end do
else if (delta /= dzero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= dzero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= dzero) then
if (gamma == dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = dzero
end do
else if (delta /= dzero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = dzero
end do
else if (delta /= dzero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= dzero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= dzero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_dabgdxyz
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (inout) :: w(:)
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='dabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==dzero).or.(b==dzero).or. &
& (c==dzero).or.(d==dzero).or.&
& (e==dzero).or.(f==dzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_dxyzw

@ -1567,3 +1567,300 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return return
end subroutine eaxpbyv2 end subroutine eaxpbyv2
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='eabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == ezero) then
if (gamma == ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = ezero
end do
else if (delta /= ezero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = ezero
end do
else if (delta /= ezero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = ezero ! gamma*y(i)
end do
else if (delta /= ezero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= ezero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= ezero) then
if (gamma == ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = ezero
end do
else if (delta /= ezero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = ezero
end do
else if (delta /= ezero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= ezero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= ezero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_eabgdxyz
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (inout) :: w(:)
integer(psb_epk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='eabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==ezero).or.(b==ezero).or. &
& (c==ezero).or.(d==ezero).or.&
& (e==ezero).or.(f==ezero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_exyzw

@ -1567,3 +1567,300 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return return
end subroutine i2axpbyv2 end subroutine i2axpbyv2
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == i2zero) then
if (gamma == i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = i2zero ! gamma*y(i)
end do
else if (delta /= i2zero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= i2zero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= i2zero) then
if (gamma == i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= i2zero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= i2zero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_i2abgdxyz
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (inout) :: w(:)
integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==i2zero).or.(b==i2zero).or. &
& (c==i2zero).or.(d==i2zero).or.&
& (e==i2zero).or.(f==i2zero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_i2xyzw

@ -1567,3 +1567,300 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return return
end subroutine maxpbyv2 end subroutine maxpbyv2
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='mabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == mzero) then
if (gamma == mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = mzero
end do
else if (delta /= mzero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = mzero
end do
else if (delta /= mzero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = mzero ! gamma*y(i)
end do
else if (delta /= mzero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= mzero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= mzero) then
if (gamma == mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = mzero
end do
else if (delta /= mzero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = mzero
end do
else if (delta /= mzero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= mzero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= mzero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_mabgdxyz
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (inout) :: w(:)
integer(psb_mpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='mabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==mzero).or.(b==mzero).or. &
& (c==mzero).or.(d==mzero).or.&
& (e==mzero).or.(f==mzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_mxyzw

@ -1567,3 +1567,300 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return return
end subroutine saxpbyv2 end subroutine saxpbyv2
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='sabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == szero) then
if (gamma == szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = szero
end do
else if (delta /= szero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = szero
end do
else if (delta /= szero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = szero ! gamma*y(i)
end do
else if (delta /= szero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= szero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= szero) then
if (gamma == szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = szero
end do
else if (delta /= szero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = szero
end do
else if (delta /= szero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= szero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= szero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_sabgdxyz
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (inout) :: w(:)
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='sabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==szero).or.(b==szero).or. &
& (c==szero).or.(d==szero).or.&
& (e==szero).or.(f==szero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_sxyzw

@ -1567,3 +1567,300 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return return
end subroutine zaxpbyv2 end subroutine zaxpbyv2
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='zabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == zzero) then
if (gamma == zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = zzero
end do
else if (delta /= zzero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = zzero
end do
else if (delta /= zzero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = zzero ! gamma*y(i)
end do
else if (delta /= zzero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= zzero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= zzero) then
if (gamma == zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = zzero
end do
else if (delta /= zzero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = zzero
end do
else if (delta /= zzero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= zzero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= zzero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_zabgdxyz
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (inout) :: w(:)
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='zabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==zzero).or.(b==zzero).or. &
& (c==zzero).or.(d==zzero).or.&
& (e==zzero).or.(f==zzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_zxyzw

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory) ! psb_upd_perm_ Permutation(more memory)
! !
! !
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold) subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_cspasb use psb_base_mod, psb_protect_name => psb_cspasb
use psb_sort_mod use psb_sort_mod
use psi_mod use psi_mod
@ -58,6 +58,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt character(len=*), optional, intent(in) :: afmt
class(psb_c_base_sparse_mat), intent(in), optional :: mold class(psb_c_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals.... !....Locals....
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_ info = psb_success_
name = 'psb_spasb' name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)& if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),& & write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...' & ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins !check on errors encountered in psdspins
if (a%is_bld()) then if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
end if end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_c_coo_sparse_mat) :: acoo
!!$ type(psb_c_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_c_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt() ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',& write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory) ! psb_upd_perm_ Permutation(more memory)
! !
! !
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold) subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_dspasb use psb_base_mod, psb_protect_name => psb_dspasb
use psb_sort_mod use psb_sort_mod
use psi_mod use psi_mod
@ -58,6 +58,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt character(len=*), optional, intent(in) :: afmt
class(psb_d_base_sparse_mat), intent(in), optional :: mold class(psb_d_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals.... !....Locals....
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_ info = psb_success_
name = 'psb_spasb' name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)& if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),& & write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...' & ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins !check on errors encountered in psdspins
if (a%is_bld()) then if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
end if end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_d_coo_sparse_mat) :: acoo
!!$ type(psb_d_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_d_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt() ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',& write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -67,7 +67,7 @@ subroutine psb_icdasb(desc,info,ext_hv,mold)
integer(psb_mpk_) :: icomm integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np,me integer(psb_ipk_) :: np,me
logical :: ext_hv_ logical :: ext_hv_
logical, parameter :: do_timings=.true. logical, parameter :: do_timings=.false.
integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1
integer(psb_ipk_), save :: idx_phase11=-1, idx_phase12=-1, idx_phase13=-1 integer(psb_ipk_), save :: idx_phase11=-1, idx_phase12=-1, idx_phase13=-1
integer(psb_ipk_), save :: idx_total=-1 integer(psb_ipk_), save :: idx_total=-1

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory) ! psb_upd_perm_ Permutation(more memory)
! !
! !
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold) subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_sspasb use psb_base_mod, psb_protect_name => psb_sspasb
use psb_sort_mod use psb_sort_mod
use psi_mod use psi_mod
@ -58,6 +58,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt character(len=*), optional, intent(in) :: afmt
class(psb_s_base_sparse_mat), intent(in), optional :: mold class(psb_s_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals.... !....Locals....
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_ info = psb_success_
name = 'psb_spasb' name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)& if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),& & write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...' & ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins !check on errors encountered in psdspins
if (a%is_bld()) then if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
end if end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_s_coo_sparse_mat) :: acoo
!!$ type(psb_s_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_s_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt() ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',& write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory) ! psb_upd_perm_ Permutation(more memory)
! !
! !
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold) subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_zspasb use psb_base_mod, psb_protect_name => psb_zspasb
use psb_sort_mod use psb_sort_mod
use psi_mod use psi_mod
@ -58,6 +58,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt character(len=*), optional, intent(in) :: afmt
class(psb_z_base_sparse_mat), intent(in), optional :: mold class(psb_z_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals.... !....Locals....
type(psb_ctxt_type) :: ctxt type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_ info = psb_success_
name = 'psb_spasb' name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)& if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),& & write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...' & ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins !check on errors encountered in psdspins
if (a%is_bld()) then if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
end if end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_z_coo_sparse_mat) :: acoo
!!$ type(psb_z_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_z_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt() ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',& write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -2018,3 +2018,321 @@ CPPFLAGS="$SAVE_CPPFLAGS";
])dnl ])dnl
dnl @synopsis PAC_ARG_WITH_LIBRSB
dnl
dnl Test for --with-librsb="pathname".
dnl
dnl Defines the path to LIBRSB build dir.
dnl
dnl note: Renamed after PAC_ARG_WITH_LIBS as in the Trilinos package.
dnl
dnl Example use:
dnl
dnl PAC_ARG_WITH_LIBRSB
dnl
dnl tests for --with-librsb and pre-pends to LIBRSB_PATH
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_ARG_WITH_LIBRSB,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
AC_ARG_WITH(librsb,
AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
--with-librsb=/opt/packages/librsb]),
[pac_cv_librsb_dir=$withval],
[pac_cv_librsb_dir=''])
if test "x$pac_cv_librsb_dir" != "x"; then
LIBS="-L$pac_cv_librsb_dir $LIBS"
RSB_INCLUDES="-I$pac_cv_librsb_dir"
# CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
RSB_LIBDIR="-L$pac_cv_librsb_dir"
fi
#AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
[pac_rsb_header_ok=yes],
[pac_rsb_header_ok=no; RSB_INCLUDES=""])
if test "x$pac_rsb_header_ok" == "xyes" ; then
RSB_LIBS="-lrsb $RSB_LIBDIR"
# LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
# AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
# AC_TRY_LINK_FUNC(spgpuCreate,
# [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
# [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
# AC_MSG_RESULT($pac_gpu_lib_ok)
# if test "x$pac_cv_have_spgpu" == "xyes" ; then
# AC_MSG_NOTICE([Have found SPGPU])
RSBLIBNAME="librsb.a";
LIBRSB_DIR="$pac_cv_librsb_dir";
# SPGPU_DEFINES="-DHAVE_SPGPU";
LIBRSB_INCDIR="$LIBRSB_DIR";
LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
# CUDA_DIR="$pac_cv_cuda_dir";
LIBRSB_DEFINES="-DHAVE_RSB";
LRSB=-lpsb_rsb
# CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
# CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
fi
# fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])
dnl
dnl @synopsis PAC_CHECK_SPGPU
dnl
dnl Will try to find the spgpu library and headers.
dnl
dnl Will use $CC
dnl
dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND.
dnl Note : This file will be likely to induce the compiler to create a module file
dnl (for a module called conftest).
dnl Depending on the compiler flags, this could cause a conftest.mod file to appear
dnl in the present directory, or in another, or with another name. So be warned!
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_SPGPU,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then
PAC_CHECK_CUDA()
fi
dnl AC_MSG_NOTICE([From CUDA: $pac_cv_have_cuda ])
if test "x$pac_cv_have_cuda" == "xyes"; then
AC_ARG_WITH(spgpu, AC_HELP_STRING([--with-spgpu=DIR], [Specify the directory for SPGPU library and includes.]),
[pac_cv_spgpudir=$withval],
[pac_cv_spgpudir=''])
AC_LANG([C])
if test "x$pac_cv_spgpudir" != "x"; then
LIBS="-L$pac_cv_spgpudir/lib $LIBS"
GPU_INCLUDES="-I$pac_cv_spgpudir/include"
CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
GPU_LIBDIR="-L$pac_cv_spgpudir/lib"
fi
AC_MSG_CHECKING([spgpu dir $pac_cv_spgpudir])
AC_CHECK_HEADER([core.h],
[pac_gpu_header_ok=yes],
[pac_gpu_header_ok=no; GPU_INCLUDES=""])
if test "x$pac_gpu_header_ok" == "xyes" ; then
GPU_LIBS="-lspgpu $GPU_LIBDIR"
LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
AC_TRY_LINK_FUNC(spgpuCreate,
[pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
[pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
AC_MSG_RESULT($pac_gpu_lib_ok)
if test "x$pac_cv_have_spgpu" == "xyes" ; then
AC_MSG_NOTICE([Have found SPGPU])
SPGPULIBNAME="libpsbgpu.a";
SPGPU_DIR="$pac_cv_spgpudir";
SPGPU_DEFINES="-DHAVE_SPGPU";
SPGPU_INCDIR="$SPGPU_DIR/include";
SPGPU_INCLUDES="-I$SPGPU_INCDIR";
SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib";
LGPU=-lpsb_gpu
CUDA_DIR="$pac_cv_cuda_dir";
CUDA_DEFINES="-DHAVE_CUDA";
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES";
CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ;
fi
fi
fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])dnl
dnl @synopsis PAC_CHECK_CUDA
dnl
dnl Will try to find the cuda library and headers.
dnl
dnl Will use $CC
dnl
dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND.
dnl Note : This file will be likely to induce the compiler to create a module file
dnl (for a module called conftest).
dnl Depending on the compiler flags, this could cause a conftest.mod file to appear
dnl in the present directory, or in another, or with another name. So be warned!
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_CUDA,
[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]),
[pac_cv_cuda_dir=$withval],
[pac_cv_cuda_dir=''])
AC_LANG([C])
SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_cuda_dir" != "x"; then
CUDA_DIR="$pac_cv_cuda_dir"
LIBS="-L$pac_cv_cuda_dir/lib $LIBS"
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_DEFINES="-DHAVE_CUDA"
CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
if test -f "$pac_cv_cuda_dir/bin/nvcc"; then
CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc"
else
CUDA_NVCC="nvcc"
fi
fi
AC_MSG_CHECKING([cuda dir $pac_cv_cuda_dir])
AC_CHECK_HEADER([cuda_runtime.h],
[pac_cuda_header_ok=yes],
[pac_cuda_header_ok=no; CUDA_INCLUDES=""])
if test "x$pac_cuda_header_ok" == "xyes" ; then
CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR"
LIBS="$CUDA_LIBS -lm $LIBS";
AC_MSG_CHECKING([for cudaMemcpy in $CUDA_LIBS])
AC_TRY_LINK_FUNC(cudaMemcpy,
[pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; ],
[pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS=""])
AC_MSG_RESULT($pac_cuda_lib_ok)
fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])dnl
dnl @synopsis PAC_ARG_WITH_CUDACC
dnl
dnl Test for --with-cudacc="set_of_cc".
dnl
dnl Defines the CC to compile for
dnl
dnl
dnl Example use:
dnl
dnl PAC_ARG_WITH_CUDACC
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_WITH_CUDACC],
[
AC_ARG_WITH(cudacc,
AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, for example,
--with-cudacc=50,60,70,75]),
[pac_cv_cudacc=$withval],
[pac_cv_cudacc=''])
])
AC_DEFUN(PAC_ARG_WITH_LIBRSB,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
AC_ARG_WITH(librsb,
AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
--with-librsb=/opt/packages/librsb]),
[pac_cv_librsb_dir=$withval],
[pac_cv_librsb_dir=''])
if test "x$pac_cv_librsb_dir" != "x"; then
LIBS="-L$pac_cv_librsb_dir $LIBS"
RSB_INCLUDES="-I$pac_cv_librsb_dir"
# CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
RSB_LIBDIR="-L$pac_cv_librsb_dir"
fi
#AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
[pac_rsb_header_ok=yes],
[pac_rsb_header_ok=no; RSB_INCLUDES=""])
if test "x$pac_rsb_header_ok" == "xyes" ; then
RSB_LIBS="-lrsb $RSB_LIBDIR"
# LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
# AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
# AC_TRY_LINK_FUNC(spgpuCreate,
# [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
# [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
# AC_MSG_RESULT($pac_gpu_lib_ok)
# if test "x$pac_cv_have_spgpu" == "xyes" ; then
# AC_MSG_NOTICE([Have found SPGPU])
RSBLIBNAME="librsb.a";
LIBRSB_DIR="$pac_cv_librsb_dir";
# SPGPU_DEFINES="-DHAVE_SPGPU";
LIBRSB_INCDIR="$LIBRSB_DIR";
LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
# CUDA_DIR="$pac_cv_cuda_dir";
LIBRSB_DEFINES="-DHAVE_RSB";
LRSB=-lpsb_rsb
# CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
# CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
fi
# fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])
dnl
dnl @synopsis PAC_CHECK_CUDA_VERSION
dnl
dnl Will try to find the cuda version
dnl
dnl Will use $CC
dnl
dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND.
dnl Note : This file will be likely to induce the compiler to create a module file
dnl (for a module called conftest).
dnl Depending on the compiler flags, this could cause a conftest.mod file to appear
dnl in the present directory, or in another, or with another name. So be warned!
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_CUDA_VERSION,
[AC_LANG_PUSH([C])
SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then
PAC_CHECK_CUDA()
fi
if test "x$pac_cv_have_cuda" == "xyes"; then
CUDA_DIR="$pac_cv_cuda_dir"
LIBS="-L$pac_cv_cuda_dir/lib $LIBS"
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_DEFINES="-DHAVE_CUDA"
CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR"
LIBS="$CUDA_LIBS -lm $LIBS";
AC_MSG_CHECKING([for CUDA version])
AC_LINK_IFELSE([AC_LANG_SOURCE([
#include <stdio.h>
#include <cuda.h>
int main(int argc, char **argv)
{
printf("%d",CUDA_VERSION);
return(0);
} ])],
[pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`;],
[pac_cv_cuda_version="unknown";])
AC_MSG_RESULT($pac_cv_cuda_version)
fi
AC_LANG_POP([C])
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])dnl

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save