Merge branch 'repackage' into development

development
sfilippone 3 months ago
commit 40e40e69f5

@ -67,6 +67,26 @@ UTILMODNAME=@UTILMODNAME@
CBINDLIBNAME=libpsb_cbind.a
CUDAD=@CUDAD@
CUDALD=@CUDALD@
LCUDA=@LCUDA@
SPGPU_LIBS=@SPGPU_LIBS@
CUDA_DIR=@CUDA_DIR@
CUDA_DEFINES=@CUDA_DEFINES@
CUDA_INCLUDES=@CUDA_INCLUDES@
CUDA_LIBS=@CUDA_LIBS@
CUDA_VERSION=@CUDA_VERSION@
CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@
.SUFFIXES: .cu
.cu.o:
$(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<
@PSBLASRULES@
PSBGPULDLIBS=$(LCUDA) $(SPGPU_LIBS) $(CUDA_LIBS) $(PSBLDLIBS) $(LIBS)

@ -1,6 +1,6 @@
include Make.inc
all: dirs based precd kryld utild cbindd libd
all: dirs based precd kryld utild cbindd extd $(CUDAD) libd
@echo "====================================="
@echo "PSBLAS libraries Compilation Successful."
@ -12,15 +12,20 @@ dirs:
precd: based
utild: based
kryld: precd
extd: based
cudad: extd
cbindd: based precd kryld utild
libd: based precd kryld utild cbindd
libd: based precd kryld utild cbindd extd $(CUDALD)
$(MAKE) -C base lib
$(MAKE) -C prec lib
$(MAKE) -C krylov lib
$(MAKE) -C util lib
$(MAKE) -C cbind lib
$(MAKE) -C ext lib
cudald: cudad
$(MAKE) -C cuda lib
based:
$(MAKE) -C base objs
@ -32,6 +37,10 @@ utild:
$(MAKE) -C util objs
cbindd:
$(MAKE) -C cbind objs
extd: based
$(MAKE) -C ext objs
cudad: based extd
$(MAKE) -C cuda objs
install: all
@ -56,6 +65,8 @@ clean:
$(MAKE) -C krylov clean
$(MAKE) -C util clean
$(MAKE) -C cbind clean
$(MAKE) -C ext clean
$(MAKE) -C cuda clean
check: all
make check -C test/serial
@ -71,6 +82,8 @@ veryclean: cleanlib
cd krylov && $(MAKE) veryclean
cd util && $(MAKE) veryclean
cd cbind && $(MAKE) veryclean
cd ext && $(MAKE) veryclean
cd cuda && $(MAKE) veryclean
cd test/fileread && $(MAKE) clean
cd test/pargen && $(MAKE) clean
cd test/util && $(MAKE) clean

@ -1,4 +1,4 @@
PSBLAS library, version 3.8
PSBLAS library, version 3.9
===========================
The architecture of the Fortran 2003 sparse BLAS is described in:
@ -40,6 +40,15 @@ The main reference for the serial sparse BLAS is:
>linear algebra subprograms for sparse matrices: a user level interface,
>ACM Trans. Math. Softw., 23(3), 379-401, 1997.
CUDA and GPU support
--------------------
This version of PSBLAS incorporates into a single package three
entities that were previouslty separated:
1. PSBLAS -- the base library
2. PSBLAS-EXT -- a library providing additional storage formats
3. SPGPU -- a package of kernels for NVIDIA GPUs originally
written by Davide Barbieri and Salvatore Filippone;
see the license file cuda/License-spgpu.md
INSTALLING
----------
@ -61,6 +70,11 @@ prerequisites (see also SERIAL below):
specify `--with-amd` (see `./configure --help` for more details).
We use the C interface to AMD.
5. If you have CUDA available, use
--with-cuda=<path> to specify the CUDA toolkit location
--with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute
capabilities) to compile the CUDA code for.
The configure script will generate a Make.inc file suitable for building
the library. The script is capable of recognizing the needed libraries
with their default names; if they are in unusual places consider adding

@ -191,7 +191,9 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false.
complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false.
complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false.
complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false.
complex(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false.
real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false.
real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false.
real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false.
real(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false.
integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false.
integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false.
integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false.
integer(psb_epk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false.
integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false.
integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false.
integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false.
integer(psb_i2pk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false.
integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false.
integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false.
integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false.
integer(psb_mpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false.
real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false.
real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false.
real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false.
real(psb_spk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -191,7 +191,9 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
logical, parameter :: usersend=.false.
complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -676,7 +678,9 @@ subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, &
logical, parameter :: usersend=.false.
complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -195,7 +195,9 @@ subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,&
logical, parameter :: usersend=.false.
complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_
@ -688,7 +690,9 @@ subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,&
logical, parameter :: usersend=.false.
complex(psb_dpk_), pointer, dimension(:) :: sndbuf, rcvbuf
#if !defined(FLANG)
volatile :: sndbuf, rcvbuf
#endif
character(len=20) :: name
info=psb_success_

@ -87,7 +87,7 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
integer(psb_lpk_) :: mglob, ih
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me
logical, parameter :: gettime=.true., debug=.false.
logical, parameter :: debug=.false.
integer(psb_mpk_) :: xchg_alg
logical, parameter :: do_timings=.false.
integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1
@ -132,10 +132,6 @@ subroutine psi_adjcncy_fnd_owner(idx,iprc,adj,idxmap,info)
goto 9999
end if
if (gettime) then
t0 = psb_wtime()
end if
nadj = size(adj)
nidx = size(idx)
call psb_realloc(nidx,iprc,info)

@ -137,7 +137,7 @@ subroutine psi_i_desc_index(desc,index_in,dep_list,&
& idxr, idxs, iszs, iszr, nesd, nerv, ixp, idx
integer(psb_mpk_) :: icomm, minfo
logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false.
logical, parameter :: do_timings=.false., oldstyle=.false., debug=.false.
integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1, idx_phase4=-1
logical, parameter :: usempi=.false.
integer(psb_ipk_) :: debug_level, debug_unit

@ -99,6 +99,33 @@ module psi_c_serial_mod
end subroutine psi_caxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_cabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (inout) :: w(:)
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_cxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_cgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_spk_

@ -99,6 +99,33 @@ module psi_d_serial_mod
end subroutine psi_daxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_dabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (inout) :: w(:)
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_dxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_dgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_dpk_

@ -99,6 +99,33 @@ module psi_e_serial_mod
end subroutine psi_eaxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_eabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (inout) :: w(:)
integer(psb_epk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_exyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_egthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_

@ -99,6 +99,33 @@ module psi_i2_serial_mod
end subroutine psi_i2axpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_i2abgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (inout) :: w(:)
integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_i2xyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_i2gthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_

@ -99,6 +99,33 @@ module psi_m_serial_mod
end subroutine psi_maxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_mabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (inout) :: w(:)
integer(psb_mpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_mxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_mgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_

@ -99,6 +99,33 @@ module psi_s_serial_mod
end subroutine psi_saxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_sabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (inout) :: w(:)
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_sxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_sgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_spk_

@ -99,6 +99,33 @@ module psi_z_serial_mod
end subroutine psi_zaxpbyv2
end interface psb_geaxpby
interface psi_abgdxyz
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
end subroutine psi_zabgdxyz
end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (inout) :: w(:)
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
end subroutine psi_zxyzw
end interface psi_xyzw
interface psi_gth
subroutine psi_zgthmv(n,k,idx,alpha,x,beta,y)
import :: psb_ipk_, psb_dpk_

@ -409,7 +409,7 @@ contains
!
! Since the hashed lists take up (somewhat) more than 2*N_COL integers,
! it makes no sense to use them if you don't have at least
! 3 processes, no matter what the size of the process.
! 3 processes, no matter what the size of the index space.
!
val = psb_cd_is_large_size(m) .and. (np > 2)
end function psb_cd_choose_large_state

@ -215,7 +215,9 @@ contains
end if
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_,info) &
!$omp private(i)
do i=1, size(idx)
if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -229,9 +231,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_,info) &
!$omp private(i)
do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
idx(i) = idxmap%min_glob_row + idx(i) - 1
@ -243,7 +247,7 @@ contains
info = -1
end if
end do
!$omp end parallel do
end if
end subroutine block_ll2gv1
@ -277,7 +281,9 @@ contains
end if
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,owned_,info,im) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
@ -291,9 +297,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,owned_,info,im) &
!$omp private(i)
do i=1, im
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
idxout(i) = idxmap%min_glob_row + idxin(i) - 1
@ -305,7 +313,7 @@ contains
info = -1
end if
end do
!$omp end parallel do
end if
if (is > im) then
@ -392,6 +400,9 @@ contains
if (present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,nv,tidx)
do i=1, is
if (mask(i)) then
if ((idxmap%min_glob_row <= idx(i)).and. &
@ -408,7 +419,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,ip,lip,tidx,info)
do i=1,is
if (mask(i)) then
if ((idxmap%min_glob_row <= idx(i)).and.&
@ -424,8 +439,8 @@ contains
end if
end if
end do
!$omp end parallel do
else
!!$ write(0,*) 'Block status: invalid ',idxmap%get_state()
idx(1:is) = -1
info = -1
end if
@ -433,6 +448,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,nv,tidx)
do i=1, is
if ((idxmap%min_glob_row <= idx(i)).and.&
& (idx(i) <= idxmap%max_glob_row)) then
@ -447,8 +465,11 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,ip,lip,tidx,info)
do i=1,is
if ((idxmap%min_glob_row <= idx(i)).and.&
& (idx(i) <= idxmap%max_glob_row)) then
@ -462,6 +483,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -953,7 +975,9 @@ contains
end if
info = psb_success_
else
info = -5
write(0,*) 'From has_search_ins:',info,ip,lip,nxt,&
& idxmap%min_glob_row,idxmap%max_glob_row
info = -6
return
end if
idxout(i) = lip + idxmap%local_rows

@ -221,6 +221,9 @@ contains
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx)
if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -233,9 +236,12 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
idx(i) = idxmap%loc_to_glob(idx(i))
@ -246,7 +252,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
end subroutine hash_l2gv1
@ -363,6 +369,9 @@ contains
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is
if (mask(i)) then
ip = idx(i)
@ -388,7 +397,7 @@ contains
endif
end if
enddo
!$omp end parallel do
else
write(0,*) 'Hash status: invalid ',idxmap%get_state()
idx(1:is) = -1
@ -404,6 +413,9 @@ contains
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
@ -427,14 +439,12 @@ contains
idx(i) = lip
endif
enddo
!$omp end parallel do
else
write(0,*) 'Hash status: invalid ',idxmap%get_state()
idx(1:is) = -1
info = -1
end if
end if
end subroutine hash_g2lv1
@ -493,6 +503,9 @@ contains
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is
if (mask(i)) then
ip = idxin(i)
@ -518,6 +531,7 @@ contains
endif
end if
enddo
!$omp end parallel do
else
write(0,*) 'Hash status: invalid ',idxmap%get_state()
idxout(1:is) = -1
@ -533,6 +547,9 @@ contains
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idxin,idxout,mglob,idxmap,nrm,ncol,nrow,owned_) &
!$omp private(i,ip,lip,tlip,info)
do i = 1, is
ip = idxin(i)
if ((ip < 1 ).or.(ip>mglob)) then
@ -556,14 +573,12 @@ contains
idxout(i) = lip
endif
enddo
!$omp end parallel do
else
write(0,*) 'Hash status: invalid ',idxmap%get_state()
idxout(1:is) = -1
info = -1
end if
end if
end subroutine hash_g2lv2
@ -649,7 +664,7 @@ contains
& err_act
integer(psb_lpk_) :: mglob, ip, nxt, tlip
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: me, np
integer(psb_ipk_) :: me, np,ith
character(len=20) :: name,ch_err
logical, allocatable :: mask_(:)
!!$ logical :: use_openmp = .true.
@ -683,370 +698,249 @@ contains
mglob = idxmap%get_gr()
nrow = idxmap%get_lr()
!write(0,*) me,name,' before loop ',psb_errstatus_fatal()
#if 0 && defined(OPENMP)
!call OMP_init_lock(ins_lck)
if (idxmap%is_bld()) then
#if defined(OPENMP)
isLoopValid = .true.
ncol = idxmap%get_lc()
if (present(mask)) then
mask_ = mask
else
allocate(mask_(size(idx)))
mask_ = .true.
end if
if (idxmap%is_bld()) then
if (present(lidx)) then
if (present(mask)) then
!$omp critical(hash_g2l_ins)
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz,lidx) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(lidx,mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is
info = 0
if (.not. isLoopValid) cycle
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob) ) then
idx(i) = -1
cycle
endif
call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc()
call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
call OMP_set_lock(ins_lck)
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
ncol = idxmap%get_lc()
if (lip > 0) then
idx(i) = lip
info = psb_success_
else
!$omp critical(hash_g2l_ins)
tlip = lip
nxt = lidx(i)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (nxt <= nrow) then
idx(i) = -1
else
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else if (lip < 0) then
! Index not found
else
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info /= 0) write(0,*) ' inskey 1 info:',info
if (info >=0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
if (nxt == lip) then
call psb_ensure_size(lip,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
!write(0,*) 'Error spot 1'
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
idxmap%loc_to_glob(nxt) = ip
nxt = max(ncol,nxt)
call idxmap%set_lc(nxt)
endif
if (isLoopValid) info = 0
idx(i) = lip
info = psb_success_
else
idx(i) = -1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
call OMP_unset_lock(ins_lck)
end if
else
idx(i) = lip
endif
!$omp end critical(hash_g2l_ins)
end if
else
idx(i) = -1
end if
enddo
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
!$omp end parallel do
if (.not. isLoopValid) then
goto 9999
end if
else
!$omp critical(hash_g2l_ins)
else if (.not.present(mask)) then
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz,lidx) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(lidx,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is
info = 0
if (.not. isLoopValid) cycle
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob) ) then
idx(i) = -1
cycle
endif
call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc()
call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
call OMP_set_lock(ins_lck)
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
ncol = idxmap%get_lc()
if (lip > 0) then
idx(i) = lip
info = psb_success_
else
!$omp critical(hash_g2l_ins)
tlip = lip
nxt = lidx(i)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (nxt <= nrow) then
idx(i) = -1
else
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else if (lip < 0) then
else
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
!!$ if (info /= 0) write(0,*) ' inskey 2 info:',info
if (info >=0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
if (nxt == lip) then
call psb_ensure_size(lip,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
!write(0,*) 'Error spot 2'
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
idxmap%loc_to_glob(nxt) = ip
nxt = max(ncol,nxt)
call idxmap%set_lc(nxt)
endif
if (isLoopValid) info = 0
idx(i) = lip
info = psb_success_
else
idx(i) = -1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
call OMP_unset_lock(ins_lck)
end if
else
idx(i) = lip
endif
end do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if
enddo
!$omp end parallel do
end if
else if (.not.present(lidx)) then
if(present(mask)) then
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mask,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp critical(hash_g2l_ins)
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is
info = 0
if (.not. isLoopValid) cycle
if (mask(i)) then
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
!write(0,*) me,name,' b hic 1 ',psb_errstatus_fatal()
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
!write(0,*) me,name,' a hic 1 ',psb_errstatus_fatal()
if (lip < 0) then
!call OMP_set_lock(ins_lck)
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing, so this lock is relatively rare).
if (lip > 0) then
idx(i) = lip
info = psb_success_
else
!$omp critical(hash_g2l_ins)
ncol = idxmap%get_lc()
nxt = ncol + 1
!write(0,*) me,name,' b hic 2 ',psb_errstatus_fatal()
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
!write(0,*) me,name,' a hic 2 ',psb_errstatus_fatal()
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else if (lip < 0) then
! Index not found
!write(0,*) me,name,' b hsik ',psb_errstatus_fatal()
else
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
!!$ if (info /= 0) write(0,*) ' inskey 3 info:',info
if (psb_errstatus_fatal()) write(0,*) me,name,' a hsik ',info,omp_get_thread_num()
!write(0,*) me,name,' a hsik ',psb_errstatus_fatal()
lip = tlip
if (info >=0) then
!write(0,*) 'Error before spot 3', info
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
if (nxt == lip) then
call psb_ensure_size(lip,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (psb_errstatus_fatal()) write(0,*) me,name,' a esz ',info,omp_get_thread_num()
if (info /= psb_success_) then
write(0,*) 'Error spot 3', info
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(nxt)
endif
if (isLoopValid) info = 0
idx(i) = lip
info = psb_success_
else
idx(i) = -1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
!!$ if (info /= 0) write(0,*) ' inskey 3.5 info:',info, isLoopValid
!call OMP_unset_lock(ins_lck)
end if
else
idx(i) = lip
!$omp end critical(hash_g2l_ins)
end if
else
idx(i) = -1
end if
enddo
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
!$omp end parallel do
if (.not. isLoopValid) then
goto 9999
end if
else
! $ OMP PARALLEL DO default(none) schedule(DYNAMIC) &
! $ OMP shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
! $ OMP private(i,ip,lip,tlip,nxt,info) &
! $ OMP reduction(.AND.:isLoopValid)
!$omp critical(hash_g2l_ins)
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(name,me,is,idx,ins_lck,mglob,idxmap,ncol,nrow,laddsz) &
!$omp private(i,ip,lip,tlip,nxt,info) &
!$omp reduction(.AND.:isLoopValid)
do i = 1, is
info = 0
if (.not. isLoopValid) cycle
ip = idx(i)
if ((ip < 1 ).or.(ip>mglob)) then
idx(i) = -1
cycle
endif
!call OMP_set_lock(ins_lck)
ncol = idxmap%get_lc()
!call OMP_unset_lock(ins_lck)
! At first, we check the index presence in 'idxmap'. Usually
! the index is found. If it is not found, we repeat the checking,
! but inside a critical region.
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
!call OMP_set_lock(ins_lck)
! We check again if the index is already in 'idxmap', this
! time inside a critical region (we assume that the index
! is often already existing).
if (lip > 0) then
idx(i) = lip
info = psb_success_
else
!$omp critical(hash_g2l_ins)
ncol = idxmap%get_lc()
nxt = ncol + 1
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
call hash_inner_cnv(ip,lip,idxmap%hashvmask,idxmap%hashv,&
& idxmap%glb_lc,ncol)
if (lip > 0) then
idx(i) = lip
else if (lip < 0) then
! Index not found
else
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
!!$ if (info /= 0) write(0,*) ' inskey 4 info:',info
if (info >=0) then
! 'nxt' is not equal to 'tlip' when the key is already inside
! the hash map. In that case 'tlip' is the value corresponding
! to the existing mapping.
if (nxt == tlip) then
ncol = MAX(ncol,nxt)
call psb_ensure_size(ncol,idxmap%loc_to_glob,info,&
if (nxt == lip) then
call psb_ensure_size(lip,idxmap%loc_to_glob,info,&
& pad=-1_psb_lpk_,addsz=laddsz)
if (info /= psb_success_) then
!write(0,*) 'Error spot 4'
info=1
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='psb_ensure_size',i_err=(/info/))
isLoopValid = .false.
idx(i) = -1
else
idx(i) = lip
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(ncol)
end if
idxmap%loc_to_glob(nxt) = ip
call idxmap%set_lc(nxt)
endif
if (isLoopValid) info = 0
idx(i) = lip
info = psb_success_
else
idx(i) = -1
end if
!call OMP_unset_lock(ins_lck)
call psb_errpush(psb_err_from_subroutine_ai_,name,&
& a_err='SearchInsKeyVal',i_err=(/info/))
isLoopValid = .false.
end if
else
idx(i) = lip
end if
end do
! $ OMP END PARALLEL DO
!$omp end critical(hash_g2l_ins)
if (.not. isLoopValid) then
goto 9999
end if
enddo
!$omp end parallel do
end if
end if
else
@ -1054,7 +948,7 @@ contains
idx = -1
info = -1
end if
!call OMP_destroy_lock(ins_lck)
if (.not. isLoopValid) goto 9999
#else
!!$ else if (.not.use_openmp) then
isLoopValid = .true.
@ -1073,13 +967,13 @@ contains
call hash_inner_cnv(ip,lip,idxmap%hashvmask,&
& idxmap%hashv,idxmap%glb_lc,ncol)
if (lip < 0) then
tlip = lip
nxt = lidx(i)
if (nxt <= nrow) then
idx(i) = -1
cycle
endif
call psb_hash_searchinskey(ip,tlip,nxt,idxmap%hash,info)
lip = tlip
if (info >=0) then
if (nxt == tlip) then
ncol = max(ncol,nxt)
@ -1754,6 +1648,9 @@ contains
! for a width of psb_hash_bits
!
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,glb_lc,nrm,mask) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n
if (mask(i)) then
key = x(i)
@ -1791,7 +1688,11 @@ contains
end if
end if
end do
!$omp end parallel do
else
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,glb_lc,nrm) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n
key = x(i)
ih = iand(key,hashmask)
@ -1827,6 +1728,7 @@ contains
x(i) = tmp
end if
end do
!$omp end parallel do
end if
end subroutine hash_inner_cnv1
@ -1849,6 +1751,9 @@ contains
! for a width of psb_hash_bits
!
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,mask,psb_err_unit) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n
if (mask(i)) then
key = x(i)
@ -1889,9 +1794,12 @@ contains
end if
end if
end do
!$omp end parallel do
else
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(n,hashv,hashmask,x,y,glb_lc,nrm,psb_err_unit) &
!$omp private(i,key,idx,ih,nh,tmp,lb,ub,lm)
do i=1, n
key = x(i)
ih = iand(key,hashmask)
@ -1930,6 +1838,7 @@ contains
y(i) = tmp
end if
end do
!$omp end parallel do
end if
end subroutine hash_inner_cnv2

@ -383,12 +383,12 @@ contains
integer(psb_lpk_), intent(out) :: val
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: hsize,hmask, hk, hd
integer(psb_ipk_) :: hsize,hmask, hk, hd, i
logical :: redo
info = HashOK
hsize = hash%hsize
hmask = hash%hmask
val = -1
hk = iand(psb_hashval(key),hmask)
if (hk == 0) then
hd = 1
@ -400,21 +400,21 @@ contains
info = HashOutOfMemory
return
end if
val = -1
!$omp atomic
hash%nsrch = hash%nsrch + 1
!$omp end atomic
do
!$omp atomic
hash%nacc = hash%nacc + 1
!$omp end atomic
if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
!write(0,*) 'In searchinskey 1 : ', info, HashDuplicate
return
end if
redo = .false.
!$omp critical(hashsearchins)
if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then
!
@ -426,30 +426,31 @@ contains
call psb_hash_realloc(hash,info)
if (info /= HashOk) then
info = HashOutOfMemory
!return
else
call psb_hash_searchinskey(key,val,nextval,hash,info)
!return
redo = .true.
end if
else
hash%nk = hash%nk + 1
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
!return
end if
info = HashOk
end if
else if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
info = HashNotFound
end if
!$omp end critical(hashsearchins)
if (info /= HashOk) then
write(0,*) 'In searchinskey 2: ', info
if (redo) then
call psb_hash_searchinskey(key,val,nextval,hash,info)
return
end if
if (val > 0) return
if (val > 0) exit
hk = hk - hd
if (hk < 0) hk = hk + hsize
end do
!write(0,*) 'In searchinskey 3: ', info
end subroutine psb_hash_lsearchinskey
recursive subroutine psb_hash_isearchinskey(key,val,nextval,hash,info)
@ -459,6 +460,7 @@ contains
integer(psb_ipk_) :: hsize,hmask, hk, hd
logical :: redo
info = HashOK
hsize = hash%hsize
hmask = hash%hmask
@ -475,16 +477,21 @@ contains
return
end if
val = -1
val = -1
!$omp atomic
hash%nsrch = hash%nsrch + 1
!$omp end atomic
do
!$omp atomic
hash%nacc = hash%nacc + 1
!$omp end atomic
if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
return
end if
redo = .false.
!$OMP CRITICAL
!$omp critical(hashsearchins)
if (hash%table(hk,1) == HashFreeEntry) then
if (hash%nk == hash%hsize -1) then
!
@ -496,24 +503,28 @@ contains
call psb_hash_realloc(hash,info)
if (info /= HashOk) then
info = HashOutOfMemory
!return
else
redo = .true.
!!$ call psb_hash_searchinskey(key,val,nextval,hash,info)
!!$ return
end if
else
hash%nk = hash%nk + 1
hash%table(hk,1) = key
hash%table(hk,2) = nextval
val = nextval
!return
info = HashOk
end if
else if (hash%table(hk,1) == key) then
val = hash%table(hk,2)
info = HashDuplicate
else
info = HashNotFound
end if
!$omp end critical(hashsearchins)
if (redo) then
call psb_hash_searchinskey(key,val,nextval,hash,info)
return
end if
!$OMP END CRITICAL
if (redo) call psb_hash_searchinskey(key,val,nextval,hash,info)
if (info /= HashOk) return
if (val > 0) return
if (val > 0) exit
hk = hk - hd
if (hk < 0) hk = hk + hsize
end do
@ -551,7 +562,7 @@ contains
end if
if (hash%table(hk,1) == HashFreeEntry) then
val = HashFreeEntry
! !$ info = HashNotFound
info = HashNotFound
return
end if
hk = hk - hd
@ -591,7 +602,7 @@ contains
end if
if (hash%table(hk,1) == HashFreeEntry) then
val = HashFreeEntry
! !$ info = HashNotFound
info = HashNotFound
return
end if
hk = hk - hd

@ -153,11 +153,25 @@ module psb_indx_map_mod
procedure, pass(idxmap) :: set_gci => base_set_gci
procedure, pass(idxmap) :: set_grl => base_set_grl
procedure, pass(idxmap) :: set_gcl => base_set_gcl
#if defined(IPK4) && defined(LPK8)
generic, public :: set_gr => set_grl, set_gri
generic, public :: set_gc => set_gcl, set_gci
#else
generic, public :: set_gr => set_grl
generic, public :: set_gc => set_gcl
procedure, pass(idxmap) :: set_lr => base_set_lr
procedure, pass(idxmap) :: set_lc => base_set_lc
#endif
procedure, pass(idxmap) :: set_lri => base_set_lri
procedure, pass(idxmap) :: set_lrl => base_set_lrl
procedure, pass(idxmap) :: set_lci => base_set_lci
procedure, pass(idxmap) :: set_lcl => base_set_lcl
procedure, pass(idxmap) :: inc_lc => base_inc_lc
#if defined(IPK4) && defined(LPK8)
generic, public :: set_lr => set_lrl, set_lri
generic, public :: set_lc => set_lcl, set_lci
#else
generic, public :: set_lr => set_lri
generic, public :: set_lc => set_lci
#endif
procedure, pass(idxmap) :: set_p_adjcncy => base_set_p_adjcncy
procedure, pass(idxmap) :: xtnd_p_adjcncy => base_xtnd_p_adjcncy
@ -235,7 +249,8 @@ module psb_indx_map_mod
& base_get_gr, base_get_gc, base_get_lr, base_get_lc, base_get_ctxt,&
& base_get_mpic, base_sizeof, base_set_null, &
& base_set_grl, base_set_gcl, &
& base_set_lr, base_set_lc, base_set_ctxt,&
& base_set_lri, base_set_lci, base_set_lrl, base_set_lcl, &
& base_inc_lc, base_set_ctxt,&
& base_set_mpic, base_get_fmt, base_asb, base_free,&
& base_l2gs1, base_l2gs2, base_l2gv1, base_l2gv2,&
& base_g2ls1, base_g2ls2, base_g2lv1, base_g2lv2,&
@ -557,21 +572,47 @@ contains
idxmap%global_cols = val
end subroutine base_set_gcl
subroutine base_set_lr(idxmap,val)
subroutine base_set_lri(idxmap,val)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
integer(psb_ipk_), intent(in) :: val
idxmap%local_rows = val
end subroutine base_set_lr
end subroutine base_set_lri
subroutine base_set_lc(idxmap,val)
subroutine base_set_lci(idxmap,val)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
integer(psb_ipk_), intent(in) :: val
!$omp critical
idxmap%local_cols = val
!$omp end critical
end subroutine base_set_lci
subroutine base_set_lrl(idxmap,val)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
integer(psb_lpk_), intent(in) :: val
idxmap%local_rows = val
end subroutine base_set_lrl
subroutine base_set_lcl(idxmap,val)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
integer(psb_lpk_), intent(in) :: val
!$omp critical
idxmap%local_cols = val
end subroutine base_set_lc
!$omp end critical
end subroutine base_set_lcl
subroutine base_inc_lc(idxmap)
implicit none
class(psb_indx_map), intent(inout) :: idxmap
!$omp atomic
idxmap%local_cols = idxmap%local_cols + 1
!$omp end atomic
end subroutine base_inc_lc
subroutine base_set_p_adjcncy(idxmap,val)
use psb_realloc_mod

@ -179,6 +179,9 @@ contains
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx)
if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then
@ -191,9 +194,12 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,owned_) &
!$omp private(i)
do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%get_lr())) then
idx(i) = idxmap%loc_to_glob(idx(i))
@ -204,6 +210,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
@ -298,6 +305,9 @@ contains
if (present(mask)) then
if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,owned_) &
!$omp private(i,ix)
do i=1,is
if (mask(i)) then
if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -309,6 +319,7 @@ contains
end if
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -317,6 +328,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idx,idxmap,owned_) &
!$omp private(i,ix)
do i=1, is
if ((1 <= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idx(i))
@ -326,6 +340,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -365,6 +380,9 @@ contains
if (present(mask)) then
if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idxin,idxout,idxmap,owned_) &
!$omp private(i,ix)
do i=1,is
if (mask(i)) then
if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -376,6 +394,7 @@ contains
end if
end if
end do
!$omp end parallel do
else
idxout(1:is) = -1
info = -1
@ -384,6 +403,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(is,idxin,idxout,idxmap,owned_) &
!$omp private(i,ix)
do i=1, is
if ((1 <= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
ix = idxmap%glob_to_loc(idxin(i))
@ -393,6 +415,7 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
else
idxout(1:is) = -1
info = -1
@ -541,6 +564,10 @@ contains
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,is,idx,idxmap,laddsz,lidx) &
!$omp private(i,ix,info)
! $ o m p reduction(.AND.:isLoopValid)
do i=1, is
if (info /= 0) cycle
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -579,8 +606,8 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
else if (.not.present(lidx)) then
if (present(mask)) then

@ -169,7 +169,9 @@ contains
end if
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap) &
!$omp private(i)
do i=1, size(idx)
if (mask(i)) then
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
@ -179,9 +181,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap) &
!$omp private(i)
do i=1, size(idx)
if ((1<=idx(i)).and.(idx(i) <= idxmap%local_rows)) then
! do nothing
@ -189,7 +193,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
end subroutine repl_l2gv1
@ -223,7 +227,9 @@ contains
end if
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
@ -233,9 +239,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1, im
if ((1<=idxin(i)).and.(idxin(i) <= idxmap%local_rows)) then
idxout(i) = idxin(i)
@ -243,7 +251,7 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
end if
if (is > im) info = -3
@ -324,6 +332,9 @@ contains
if (present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,is) &
!$omp private(i)
do i=1, is
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -333,7 +344,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,idxmap,is) &
!$omp private(i)
do i=1,is
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -344,6 +359,7 @@ contains
end if
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -352,6 +368,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,is) &
!$omp private(i)
do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing
@ -359,7 +378,11 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,idxmap,is) &
!$omp private(i)
do i=1,is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing
@ -367,6 +390,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
else
idx(1:is) = -1
info = -1
@ -409,6 +433,9 @@ contains
if (present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -418,7 +445,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1,im
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -428,6 +459,7 @@ contains
end if
end if
end do
!$omp end parallel do
else
idxout(1:im) = -1
info = -1
@ -436,6 +468,9 @@ contains
else if (.not.present(mask)) then
if (idxmap%is_asb()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i)
@ -443,7 +478,11 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
else if (idxmap%is_valid()) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,idxmap,im) &
!$omp private(i)
do i=1,im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i)
@ -451,6 +490,7 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
else
idxout(1:im) = -1
info = -1
@ -557,6 +597,9 @@ contains
else if (idxmap%is_valid()) then
if (present(lidx)) then
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,lidx,is,idxmap) &
!$omp private(i)
do i=1, is
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -566,9 +609,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,lidx,is,idxmap) &
!$omp private(i)
do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing
@ -576,9 +621,13 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
else if (.not.present(lidx)) then
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idx,is,idxmap) &
!$omp private(i)
do i=1, is
if (mask(i)) then
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
@ -588,8 +637,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idx,is,idxmap) &
!$omp private(i)
do i=1, is
if ((1<= idx(i)).and.(idx(i) <= idxmap%global_rows)) then
! do nothing
@ -597,6 +649,7 @@ contains
idx(i) = -1
end if
end do
!$omp end parallel do
end if
end if
else
@ -644,6 +697,9 @@ contains
else if (idxmap%is_valid()) then
if (present(lidx)) then
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -653,9 +709,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i)
@ -663,9 +721,13 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
end if
else if (.not.present(lidx)) then
if (present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(mask,idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im
if (mask(i)) then
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
@ -675,8 +737,11 @@ contains
end if
end if
end do
!$omp end parallel do
else if (.not.present(mask)) then
!$omp parallel do default(none) schedule(dynamic) &
!$omp shared(idxin,idxout,im,idxmap) &
!$omp private(i)
do i=1, im
if ((1<= idxin(i)).and.(idxin(i) <= idxmap%global_rows)) then
idxout(i) = idxin(i)
@ -684,6 +749,7 @@ contains
idxout(i) = -1
end if
end do
!$omp end parallel do
end if
end if
else

@ -136,9 +136,9 @@ module psb_const_mod
!
! Version
!
character(len=*), parameter :: psb_version_string_ = "3.8.0"
character(len=*), parameter :: psb_version_string_ = "3.9.0"
integer(psb_ipk_), parameter :: psb_version_major_ = 3
integer(psb_ipk_), parameter :: psb_version_minor_ = 8
integer(psb_ipk_), parameter :: psb_version_minor_ = 9
integer(psb_ipk_), parameter :: psb_patchlevel_ = 0
!

@ -143,6 +143,20 @@ module psb_c_psblas_mod
end subroutine psb_caxpby
end interface
interface psb_abgdxyz
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_c_vect_type, psb_cspmat_type
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
type(psb_c_vect_type), intent (inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_cabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_camax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_spk_, psb_ipk_, &

@ -143,6 +143,20 @@ module psb_d_psblas_mod
end subroutine psb_daxpby
end interface
interface psb_abgdxyz
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_d_vect_type, psb_dspmat_type
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
type(psb_d_vect_type), intent (inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_dabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_damax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &

@ -143,6 +143,20 @@ module psb_s_psblas_mod
end subroutine psb_saxpby
end interface
interface psb_abgdxyz
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_s_vect_type, psb_sspmat_type
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
type(psb_s_vect_type), intent (inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_sabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_samax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_spk_, psb_ipk_, &

@ -143,6 +143,20 @@ module psb_z_psblas_mod
end subroutine psb_zaxpby
end interface
interface psb_abgdxyz
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_z_vect_type, psb_zspmat_type
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
type(psb_z_vect_type), intent (inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_zabgdxyz_vect
end interface psb_abgdxyz
interface psb_geamax
function psb_zamax(x, desc_a, info, jx,global)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &

@ -155,6 +155,9 @@ module psb_c_base_vect_mod
procedure, pass(z) :: axpby_v2 => c_base_axpby_v2
procedure, pass(z) :: axpby_a2 => c_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => c_base_abgdxyz
procedure, pass(w) :: xyzw => c_base_xyzw
!
! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners
@ -1018,7 +1021,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine c_base_axpby_v(m,alpha, x, beta, y, info)
@ -1047,7 +1050,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned
!! \param info return code
@ -1078,7 +1081,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x(:) The array to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine c_base_axpby_a(m,alpha, x, beta, y, info)
@ -1126,6 +1129,64 @@ contains
end subroutine c_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_c_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_base_vect_type), intent(inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=czero)) call x%sync()
if (y%is_dev().and.(beta/=czero)) call y%sync()
if (z%is_dev().and.(delta/=czero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine c_base_abgdxyz
subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_base_vect_type), intent(inout) :: z
class(psb_c_base_vect_type), intent(inout) :: w
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=czero)) call x%sync()
if (y%is_dev().and.(b/=czero)) call y%sync()
if (z%is_dev().and.(d/=czero)) call z%sync()
if (w%is_dev().and.(f/=czero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine c_base_xyzw
!
! Multiple variants of two operations:

@ -579,6 +579,110 @@ module psb_c_csr_mat_mod
end subroutine psb_c_csr_scals
end interface
type, extends(psb_c_csr_sparse_mat) :: psb_c_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => c_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_c_ecsr_csmm
procedure, pass(a) :: csmv => psb_c_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_c_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_c_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_c_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_c_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_c_ecsr_cmp_nerwp
procedure, pass(a) :: free => c_ecsr_free
procedure, pass(a) :: mold => psb_c_ecsr_mold
end type psb_c_ecsr_sparse_mat
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_csmv
interface
subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_c_ecsr_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_c_ecsr_csmv
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo
interface
subroutine psb_c_ecsr_cmp_nerwp(a,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_ecsr_cmp_nerwp
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_cp_from_coo
interface
subroutine psb_c_cp_ecsr_from_coo(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_cp_ecsr_from_coo
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_cp_from_fmt
interface
subroutine psb_c_cp_ecsr_from_fmt(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_cp_ecsr_from_fmt
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_mv_from_coo
interface
subroutine psb_c_mv_ecsr_from_coo(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_mv_ecsr_from_coo
end interface
!> \memberof psb_c_ecsr_sparse_mat
!! \see psb_c_base_mat_mod::psb_c_base_mv_from_fmt
interface
subroutine psb_c_mv_ecsr_from_fmt(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_mv_ecsr_from_fmt
end interface
!> \memberof psb_c_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_c_ecsr_mold(a,b,info)
import
class(psb_c_ecsr_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_lc_csr_sparse_mat
!! \extends psb_lc_base_mat_mod::psb_lc_base_sparse_mat
!!
@ -1178,6 +1282,26 @@ contains
function c_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function c_ecsr_get_fmt
subroutine c_ecsr_free(a)
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_c_csr_sparse_mat%free()
return
end subroutine c_ecsr_free
! == ===================================
!
!

@ -79,12 +79,14 @@
module psb_c_mat_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat
use psb_c_csr_mat_mod, only : psb_c_csr_sparse_mat, psb_lc_csr_sparse_mat,&
& psb_c_ecsr_sparse_mat
use psb_c_csc_mat_mod, only : psb_c_csc_sparse_mat, psb_lc_csc_sparse_mat
type :: psb_cspmat_type
class(psb_c_base_sparse_mat), allocatable :: a
class(psb_c_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_lc_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,8 @@ module psb_c_mat_mod
procedure, pass(a) :: cscnv_ip => psb_c_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_c_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_c_split_nd
procedure, pass(a) :: merge_nd => psb_c_merge_nd
procedure, pass(a) :: clone => psb_cspmat_clone
procedure, pass(a) :: move_alloc => psb_cspmat_type_move
!
@ -840,6 +844,24 @@ module psb_c_mat_mod
!
!
interface
subroutine psb_c_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_split_nd
end interface
interface
subroutine psb_c_merge_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_c_merge_nd
end interface
!
! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target
@ -859,7 +881,6 @@ module psb_c_mat_mod
end subroutine psb_c_cscnv
end interface
interface
subroutine psb_c_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat
@ -871,7 +892,6 @@ module psb_c_mat_mod
end subroutine psb_c_cscnv_ip
end interface
interface
subroutine psb_c_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_spk_, psb_c_base_sparse_mat

@ -102,6 +102,9 @@ module psb_c_vect_mod
procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => c_vect_abgdxyz
procedure, pass(z) :: xyzw => c_vect_xyzw
procedure, pass(y) :: mlt_v => c_vect_mlt_v
procedure, pass(y) :: mlt_a => c_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2
@ -771,6 +774,38 @@ contains
end subroutine c_vect_axpby_a2
subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_vect_type), intent(inout) :: x
class(psb_c_vect_type), intent(inout) :: y
class(psb_c_vect_type), intent(inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine c_vect_abgdxyz
subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_c_vect_type), intent(inout) :: x
class(psb_c_vect_type), intent(inout) :: y
class(psb_c_vect_type), intent(inout) :: z
class(psb_c_vect_type), intent(inout) :: w
complex(psb_spk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine c_vect_xyzw
subroutine c_vect_mlt_v(x, y, info)
use psi_serial_mod
implicit none

@ -155,6 +155,9 @@ module psb_d_base_vect_mod
procedure, pass(z) :: axpby_v2 => d_base_axpby_v2
procedure, pass(z) :: axpby_a2 => d_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => d_base_abgdxyz
procedure, pass(w) :: xyzw => d_base_xyzw
!
! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners
@ -1025,7 +1028,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine d_base_axpby_v(m,alpha, x, beta, y, info)
@ -1054,7 +1057,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned
!! \param info return code
@ -1085,7 +1088,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x(:) The array to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine d_base_axpby_a(m,alpha, x, beta, y, info)
@ -1133,6 +1136,64 @@ contains
end subroutine d_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_d_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_base_vect_type), intent(inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=dzero)) call x%sync()
if (y%is_dev().and.(beta/=dzero)) call y%sync()
if (z%is_dev().and.(delta/=dzero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine d_base_abgdxyz
subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_base_vect_type), intent(inout) :: z
class(psb_d_base_vect_type), intent(inout) :: w
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=dzero)) call x%sync()
if (y%is_dev().and.(b/=dzero)) call y%sync()
if (z%is_dev().and.(d/=dzero)) call z%sync()
if (w%is_dev().and.(f/=dzero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine d_base_xyzw
!
! Multiple variants of two operations:

@ -579,6 +579,110 @@ module psb_d_csr_mat_mod
end subroutine psb_d_csr_scals
end interface
type, extends(psb_d_csr_sparse_mat) :: psb_d_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => d_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_d_ecsr_csmm
procedure, pass(a) :: csmv => psb_d_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_d_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_d_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_d_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_d_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_d_ecsr_cmp_nerwp
procedure, pass(a) :: free => d_ecsr_free
procedure, pass(a) :: mold => psb_d_ecsr_mold
end type psb_d_ecsr_sparse_mat
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_csmv
interface
subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_d_ecsr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_d_ecsr_csmv
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo
interface
subroutine psb_d_ecsr_cmp_nerwp(a,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_ecsr_cmp_nerwp
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_cp_from_coo
interface
subroutine psb_d_cp_ecsr_from_coo(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_ecsr_from_coo
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_cp_from_fmt
interface
subroutine psb_d_cp_ecsr_from_fmt(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_cp_ecsr_from_fmt
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_mv_from_coo
interface
subroutine psb_d_mv_ecsr_from_coo(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_ecsr_from_coo
end interface
!> \memberof psb_d_ecsr_sparse_mat
!! \see psb_d_base_mat_mod::psb_d_base_mv_from_fmt
interface
subroutine psb_d_mv_ecsr_from_fmt(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_mv_ecsr_from_fmt
end interface
!> \memberof psb_d_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_d_ecsr_mold(a,b,info)
import
class(psb_d_ecsr_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_ld_csr_sparse_mat
!! \extends psb_ld_base_mat_mod::psb_ld_base_sparse_mat
!!
@ -1178,6 +1282,26 @@ contains
function d_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function d_ecsr_get_fmt
subroutine d_ecsr_free(a)
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_d_csr_sparse_mat%free()
return
end subroutine d_ecsr_free
! == ===================================
!
!

@ -79,12 +79,14 @@
module psb_d_mat_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat
use psb_d_csr_mat_mod, only : psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat,&
& psb_d_ecsr_sparse_mat
use psb_d_csc_mat_mod, only : psb_d_csc_sparse_mat, psb_ld_csc_sparse_mat
type :: psb_dspmat_type
class(psb_d_base_sparse_mat), allocatable :: a
class(psb_d_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_ld_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,8 @@ module psb_d_mat_mod
procedure, pass(a) :: cscnv_ip => psb_d_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_d_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_d_split_nd
procedure, pass(a) :: merge_nd => psb_d_merge_nd
procedure, pass(a) :: clone => psb_dspmat_clone
procedure, pass(a) :: move_alloc => psb_dspmat_type_move
!
@ -840,6 +844,24 @@ module psb_d_mat_mod
!
!
interface
subroutine psb_d_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_split_nd
end interface
interface
subroutine psb_d_merge_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_d_merge_nd
end interface
!
! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target
@ -859,7 +881,6 @@ module psb_d_mat_mod
end subroutine psb_d_cscnv
end interface
interface
subroutine psb_d_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat
@ -871,7 +892,6 @@ module psb_d_mat_mod
end subroutine psb_d_cscnv_ip
end interface
interface
subroutine psb_d_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_dpk_, psb_d_base_sparse_mat

@ -102,6 +102,9 @@ module psb_d_vect_mod
procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => d_vect_abgdxyz
procedure, pass(z) :: xyzw => d_vect_xyzw
procedure, pass(y) :: mlt_v => d_vect_mlt_v
procedure, pass(y) :: mlt_a => d_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2
@ -778,6 +781,38 @@ contains
end subroutine d_vect_axpby_a2
subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_vect_type), intent(inout) :: x
class(psb_d_vect_type), intent(inout) :: y
class(psb_d_vect_type), intent(inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine d_vect_abgdxyz
subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_vect_type), intent(inout) :: x
class(psb_d_vect_type), intent(inout) :: y
class(psb_d_vect_type), intent(inout) :: z
class(psb_d_vect_type), intent(inout) :: w
real(psb_dpk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine d_vect_xyzw
subroutine d_vect_mlt_v(x, y, info)
use psi_serial_mod
implicit none

@ -155,6 +155,9 @@ module psb_s_base_vect_mod
procedure, pass(z) :: axpby_v2 => s_base_axpby_v2
procedure, pass(z) :: axpby_a2 => s_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => s_base_abgdxyz
procedure, pass(w) :: xyzw => s_base_xyzw
!
! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners
@ -1025,7 +1028,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine s_base_axpby_v(m,alpha, x, beta, y, info)
@ -1054,7 +1057,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned
!! \param info return code
@ -1085,7 +1088,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x(:) The array to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine s_base_axpby_a(m,alpha, x, beta, y, info)
@ -1133,6 +1136,64 @@ contains
end subroutine s_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_s_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_s_base_vect_type), intent(inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=szero)) call x%sync()
if (y%is_dev().and.(beta/=szero)) call y%sync()
if (z%is_dev().and.(delta/=szero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine s_base_abgdxyz
subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_base_vect_type), intent(inout) :: y
class(psb_s_base_vect_type), intent(inout) :: z
class(psb_s_base_vect_type), intent(inout) :: w
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=szero)) call x%sync()
if (y%is_dev().and.(b/=szero)) call y%sync()
if (z%is_dev().and.(d/=szero)) call z%sync()
if (w%is_dev().and.(f/=szero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine s_base_xyzw
!
! Multiple variants of two operations:

@ -579,6 +579,110 @@ module psb_s_csr_mat_mod
end subroutine psb_s_csr_scals
end interface
type, extends(psb_s_csr_sparse_mat) :: psb_s_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => s_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_s_ecsr_csmm
procedure, pass(a) :: csmv => psb_s_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_s_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_s_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_s_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_s_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_s_ecsr_cmp_nerwp
procedure, pass(a) :: free => s_ecsr_free
procedure, pass(a) :: mold => psb_s_ecsr_mold
end type psb_s_ecsr_sparse_mat
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_csmv
interface
subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_s_ecsr_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta, x(:)
real(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_s_ecsr_csmv
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo
interface
subroutine psb_s_ecsr_cmp_nerwp(a,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_ecsr_cmp_nerwp
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_cp_from_coo
interface
subroutine psb_s_cp_ecsr_from_coo(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_cp_ecsr_from_coo
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_cp_from_fmt
interface
subroutine psb_s_cp_ecsr_from_fmt(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_cp_ecsr_from_fmt
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_mv_from_coo
interface
subroutine psb_s_mv_ecsr_from_coo(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_mv_ecsr_from_coo
end interface
!> \memberof psb_s_ecsr_sparse_mat
!! \see psb_s_base_mat_mod::psb_s_base_mv_from_fmt
interface
subroutine psb_s_mv_ecsr_from_fmt(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_mv_ecsr_from_fmt
end interface
!> \memberof psb_s_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_s_ecsr_mold(a,b,info)
import
class(psb_s_ecsr_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_ls_csr_sparse_mat
!! \extends psb_ls_base_mat_mod::psb_ls_base_sparse_mat
!!
@ -1178,6 +1282,26 @@ contains
function s_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function s_ecsr_get_fmt
subroutine s_ecsr_free(a)
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_s_csr_sparse_mat%free()
return
end subroutine s_ecsr_free
! == ===================================
!
!

@ -79,12 +79,14 @@
module psb_s_mat_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat
use psb_s_csr_mat_mod, only : psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat,&
& psb_s_ecsr_sparse_mat
use psb_s_csc_mat_mod, only : psb_s_csc_sparse_mat, psb_ls_csc_sparse_mat
type :: psb_sspmat_type
class(psb_s_base_sparse_mat), allocatable :: a
class(psb_s_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_ls_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,8 @@ module psb_s_mat_mod
procedure, pass(a) :: cscnv_ip => psb_s_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_s_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_s_split_nd
procedure, pass(a) :: merge_nd => psb_s_merge_nd
procedure, pass(a) :: clone => psb_sspmat_clone
procedure, pass(a) :: move_alloc => psb_sspmat_type_move
!
@ -840,6 +844,24 @@ module psb_s_mat_mod
!
!
interface
subroutine psb_s_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_split_nd
end interface
interface
subroutine psb_s_merge_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_s_merge_nd
end interface
!
! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target
@ -859,7 +881,6 @@ module psb_s_mat_mod
end subroutine psb_s_cscnv
end interface
interface
subroutine psb_s_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat
@ -871,7 +892,6 @@ module psb_s_mat_mod
end subroutine psb_s_cscnv_ip
end interface
interface
subroutine psb_s_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_spk_, psb_s_base_sparse_mat

@ -102,6 +102,9 @@ module psb_s_vect_mod
procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => s_vect_abgdxyz
procedure, pass(z) :: xyzw => s_vect_xyzw
procedure, pass(y) :: mlt_v => s_vect_mlt_v
procedure, pass(y) :: mlt_a => s_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2
@ -778,6 +781,38 @@ contains
end subroutine s_vect_axpby_a2
subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_vect_type), intent(inout) :: x
class(psb_s_vect_type), intent(inout) :: y
class(psb_s_vect_type), intent(inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine s_vect_abgdxyz
subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_s_vect_type), intent(inout) :: x
class(psb_s_vect_type), intent(inout) :: y
class(psb_s_vect_type), intent(inout) :: z
class(psb_s_vect_type), intent(inout) :: w
real(psb_spk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine s_vect_xyzw
subroutine s_vect_mlt_v(x, y, info)
use psi_serial_mod
implicit none

@ -36,9 +36,7 @@ module psb_serial_mod
use psb_string_mod
use psb_sort_mod
use psi_serial_mod, &
& psb_gth => psi_gth,&
& psb_sct => psi_sct
use psi_serial_mod
use psb_s_serial_mod
use psb_d_serial_mod

@ -155,6 +155,9 @@ module psb_z_base_vect_mod
procedure, pass(z) :: axpby_v2 => z_base_axpby_v2
procedure, pass(z) :: axpby_a2 => z_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => z_base_abgdxyz
procedure, pass(w) :: xyzw => z_base_xyzw
!
! Vector by vector multiplication. Need all variants
! to handle multiple requirements from preconditioners
@ -1018,7 +1021,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine z_base_axpby_v(m,alpha, x, beta, y, info)
@ -1047,7 +1050,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x The class(base_vect) to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be returned
!! \param info return code
@ -1078,7 +1081,7 @@ contains
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param x(:) The array to be added
!! \param beta scalar alpha
!! \param beta scalar beta
!! \param info return code
!!
subroutine z_base_axpby_a(m,alpha, x, beta, y, info)
@ -1126,6 +1129,64 @@ contains
end subroutine z_base_axpby_a2
!
! ABGDXYZ is invoked via Z, hence the structure below.
!
!
!> Function base_abgdxyz
!! \memberof psb_z_base_vect_type
!! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
!! \param gamma scalar gamma
!! \param delta scalar delta
!! \param x The class(base_vect) to be added
!! \param y The class(base_vect) to be added
!! \param z The class(base_vect) to be added
!! \param info return code
!!
subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_z_base_vect_type), intent(inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(alpha/=zzero)) call x%sync()
if (y%is_dev().and.(beta/=zzero)) call y%sync()
if (z%is_dev().and.(delta/=zzero)) call z%sync()
call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
end subroutine z_base_abgdxyz
subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_base_vect_type), intent(inout) :: y
class(psb_z_base_vect_type), intent(inout) :: z
class(psb_z_base_vect_type), intent(inout) :: w
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
if (x%is_dev().and.(a/=zzero)) call x%sync()
if (y%is_dev().and.(b/=zzero)) call y%sync()
if (z%is_dev().and.(d/=zzero)) call z%sync()
if (w%is_dev().and.(f/=zzero)) call w%sync()
call psi_xyzw(m,a,b,c,d,e,f,x%v, y%v, z%v, w%v, info)
call y%set_host()
call z%set_host()
call w%set_host()
end subroutine z_base_xyzw
!
! Multiple variants of two operations:

@ -579,6 +579,110 @@ module psb_z_csr_mat_mod
end subroutine psb_z_csr_scals
end interface
type, extends(psb_z_csr_sparse_mat) :: psb_z_ecsr_sparse_mat
!> Number of non-empty rows
integer(psb_ipk_) :: nnerws
!> Indices of non-empty rows
integer(psb_ipk_), allocatable :: nerwp(:)
contains
procedure, nopass :: get_fmt => z_ecsr_get_fmt
! procedure, pass(a) :: csmm => psb_z_ecsr_csmm
procedure, pass(a) :: csmv => psb_z_ecsr_csmv
procedure, pass(a) :: cp_from_coo => psb_z_cp_ecsr_from_coo
procedure, pass(a) :: cp_from_fmt => psb_z_cp_ecsr_from_fmt
procedure, pass(a) :: mv_from_coo => psb_z_mv_ecsr_from_coo
procedure, pass(a) :: mv_from_fmt => psb_z_mv_ecsr_from_fmt
procedure, pass(a) :: cmp_nerwp => psb_z_ecsr_cmp_nerwp
procedure, pass(a) :: free => z_ecsr_free
procedure, pass(a) :: mold => psb_z_ecsr_mold
end type psb_z_ecsr_sparse_mat
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_csmv
interface
subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans)
import
class(psb_z_ecsr_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
complex(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
end subroutine psb_z_ecsr_csmv
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo
interface
subroutine psb_z_ecsr_cmp_nerwp(a,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_ecsr_cmp_nerwp
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_cp_from_coo
interface
subroutine psb_z_cp_ecsr_from_coo(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_cp_ecsr_from_coo
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_cp_from_fmt
interface
subroutine psb_z_cp_ecsr_from_fmt(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_cp_ecsr_from_fmt
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_mv_from_coo
interface
subroutine psb_z_mv_ecsr_from_coo(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_mv_ecsr_from_coo
end interface
!> \memberof psb_z_ecsr_sparse_mat
!! \see psb_z_base_mat_mod::psb_z_base_mv_from_fmt
interface
subroutine psb_z_mv_ecsr_from_fmt(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_mv_ecsr_from_fmt
end interface
!> \memberof psb_z_ecsr_sparse_mat
!| \see psb_base_mat_mod::psb_base_mold
interface
subroutine psb_z_ecsr_mold(a,b,info)
import
class(psb_z_ecsr_sparse_mat), intent(in) :: a
class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_ecsr_mold
end interface
!> \namespace psb_base_mod \class psb_lz_csr_sparse_mat
!! \extends psb_lz_base_mat_mod::psb_lz_base_sparse_mat
!!
@ -1178,6 +1282,26 @@ contains
function z_ecsr_get_fmt() result(res)
implicit none
character(len=5) :: res
res = 'ECSR'
end function z_ecsr_get_fmt
subroutine z_ecsr_free(a)
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
if (allocated(a%nerwp)) deallocate(a%nerwp)
a%nnerws = 0
call a%psb_z_csr_sparse_mat%free()
return
end subroutine z_ecsr_free
! == ===================================
!
!

@ -79,12 +79,14 @@
module psb_z_mat_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat
use psb_z_csr_mat_mod, only : psb_z_csr_sparse_mat, psb_lz_csr_sparse_mat,&
& psb_z_ecsr_sparse_mat
use psb_z_csc_mat_mod, only : psb_z_csc_sparse_mat, psb_lz_csc_sparse_mat
type :: psb_zspmat_type
class(psb_z_base_sparse_mat), allocatable :: a
class(psb_z_base_sparse_mat), allocatable :: ad, and
integer(psb_ipk_) :: remote_build=psb_matbld_noremote_
type(psb_lz_coo_sparse_mat), allocatable :: rmta
@ -202,6 +204,8 @@ module psb_z_mat_mod
procedure, pass(a) :: cscnv_ip => psb_z_cscnv_ip
procedure, pass(a) :: cscnv_base => psb_z_cscnv_base
generic, public :: cscnv => cscnv_np, cscnv_ip, cscnv_base
procedure, pass(a) :: split_nd => psb_z_split_nd
procedure, pass(a) :: merge_nd => psb_z_merge_nd
procedure, pass(a) :: clone => psb_zspmat_clone
procedure, pass(a) :: move_alloc => psb_zspmat_type_move
!
@ -840,6 +844,24 @@ module psb_z_mat_mod
!
!
interface
subroutine psb_z_split_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_split_nd
end interface
interface
subroutine psb_z_merge_nd(a,n_rows,n_cols,info)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
end subroutine psb_z_merge_nd
end interface
!
! CSCNV: switches to a different internal derived type.
! 3 versions: copying to target
@ -859,7 +881,6 @@ module psb_z_mat_mod
end subroutine psb_z_cscnv
end interface
interface
subroutine psb_z_cscnv_ip(a,iinfo,type,mold,dupl)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat
@ -871,7 +892,6 @@ module psb_z_mat_mod
end subroutine psb_z_cscnv_ip
end interface
interface
subroutine psb_z_cscnv_base(a,b,info,dupl)
import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_dpk_, psb_z_base_sparse_mat

@ -102,6 +102,9 @@ module psb_z_vect_mod
procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: abgdxyz => z_vect_abgdxyz
procedure, pass(z) :: xyzw => z_vect_xyzw
procedure, pass(y) :: mlt_v => z_vect_mlt_v
procedure, pass(y) :: mlt_a => z_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2
@ -771,6 +774,38 @@ contains
end subroutine z_vect_axpby_a2
subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_vect_type), intent(inout) :: x
class(psb_z_vect_type), intent(inout) :: y
class(psb_z_vect_type), intent(inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
end subroutine z_vect_abgdxyz
subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_z_vect_type), intent(inout) :: x
class(psb_z_vect_type), intent(inout) :: y
class(psb_z_vect_type), intent(inout) :: z
class(psb_z_vect_type), intent(inout) :: w
complex(psb_dpk_), intent (in) :: a, b, c, d, e, f
integer(psb_ipk_), intent(out) :: info
if (allocated(w%v)) &
call w%v%xyzw(m,a,b,c,d,e,f,x%v,y%v,z%v,info)
end subroutine z_vect_xyzw
subroutine z_vect_mlt_v(x, y, info)
use psi_serial_mod
implicit none

@ -250,7 +250,7 @@ Module psb_c_tools_mod
end interface
interface psb_spasb
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import
implicit none
type(psb_cspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_c_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_c_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_cspasb
end interface

@ -250,7 +250,7 @@ Module psb_d_tools_mod
end interface
interface psb_spasb
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import
implicit none
type(psb_dspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_d_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_d_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_dspasb
end interface

@ -250,7 +250,7 @@ Module psb_s_tools_mod
end interface
interface psb_spasb
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import
implicit none
type(psb_sspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_s_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_s_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_sspasb
end interface

@ -250,7 +250,7 @@ Module psb_z_tools_mod
end interface
interface psb_spasb
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and)
import
implicit none
type(psb_zspmat_type), intent (inout) :: a
@ -259,6 +259,7 @@ Module psb_z_tools_mod
integer(psb_ipk_),optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_z_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
end subroutine psb_zspasb
end interface

@ -741,3 +741,86 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info)
return
end subroutine psb_caddconst_vect
subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect
implicit none
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
type(psb_c_vect_type), intent (inout) :: z
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_c_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_cabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err
logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_cspmv'
info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name)
goto 9999
end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
@ -179,12 +195,44 @@ subroutine psb_cspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then
! Matrix is not transposed
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,cone,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& czero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
if(info /= psb_success_) then
info = psb_err_from_subroutine_non_

@ -741,3 +741,86 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info)
return
end subroutine psb_daddconst_vect
subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect
implicit none
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
type(psb_d_vect_type), intent (inout) :: z
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_d_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_dabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err
logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_dspmv'
info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name)
goto 9999
end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
@ -179,12 +195,44 @@ subroutine psb_dspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then
! Matrix is not transposed
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,done,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& dzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
if(info /= psb_success_) then
info = psb_err_from_subroutine_non_

@ -741,3 +741,86 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info)
return
end subroutine psb_saddconst_vect
subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect
implicit none
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
type(psb_s_vect_type), intent (inout) :: z
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_s_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_sabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err
logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_sspmv'
info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name)
goto 9999
end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
@ -179,12 +195,44 @@ subroutine psb_sspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then
! Matrix is not transposed
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,sone,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& szero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
if(info /= psb_success_) then
info = psb_err_from_subroutine_non_

@ -741,3 +741,86 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info)
return
end subroutine psb_zaddconst_vect
subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect
implicit none
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
type(psb_z_vect_type), intent (inout) :: z
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
! locals
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np, me,&
& err_act, iix, jjx, iiy, jjy, nr
integer(psb_lpk_) :: ix, ijx, iy, ijy, m
character(len=20) :: name, ch_err
name='psb_z_addconst_vect'
if (psb_errstatus_fatal()) return
info=psb_success_
call psb_erractionsave(err_act)
ctxt=desc_a%get_context()
call psb_info(ctxt, me, np)
if (np == -ione) then
info = psb_err_context_error_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(x%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(y%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
if (.not.allocated(z%v)) then
info = psb_err_invalid_vect_state_
call psb_errpush(info,name)
goto 9999
endif
ix = ione
iy = ione
m = desc_a%get_global_rows()
nr = desc_a%get_local_rows()
! check vector correctness
call psb_chkvect(m,lone,x%get_nrows(),ix,lone,desc_a,info,iix,jjx)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 1'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
call psb_chkvect(m,lone,z%get_nrows(),iy,lone,desc_a,info,iiy,jjy)
if(info /= psb_success_) then
info=psb_err_from_subroutine_
ch_err='psb_chkvect 2'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(desc_a%get_local_rows() > 0) then
call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(ctxt,err_act)
return
end subroutine psb_zabgdxyz_vect

@ -83,6 +83,9 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,&
character(len=20) :: name, ch_err
logical :: aliw, doswap_
integer(psb_ipk_) :: debug_level, debug_unit
logical, parameter :: do_timings=.true.
integer(psb_ipk_), save :: mv_phase1=-1, mv_phase2=-1, mv_phase3=-1, mv_phase4=-1
integer(psb_ipk_), save :: mv_phase11=-1, mv_phase12=-1
name='psb_zspmv'
info=psb_success_
@ -130,6 +133,19 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,&
call psb_errpush(info,name)
goto 9999
end if
if ((do_timings).and.(mv_phase1==-1)) &
& mv_phase1 = psb_get_timer_idx("SPMM: and send ")
if ((do_timings).and.(mv_phase2==-1)) &
& mv_phase2 = psb_get_timer_idx("SPMM: and cmp ad")
if ((do_timings).and.(mv_phase3==-1)) &
& mv_phase3 = psb_get_timer_idx("SPMM: and rcv")
if ((do_timings).and.(mv_phase4==-1)) &
& mv_phase4 = psb_get_timer_idx("SPMM: and cmp and")
if ((do_timings).and.(mv_phase11==-1)) &
& mv_phase11 = psb_get_timer_idx("SPMM: noand exch ")
if ((do_timings).and.(mv_phase12==-1)) &
& mv_phase12 = psb_get_timer_idx("SPMM: noand cmp")
m = desc_a%get_global_rows()
n = desc_a%get_global_cols()
@ -179,12 +195,44 @@ subroutine psb_zspmv_vect(alpha,a,x,beta,y,desc_a,info,&
if (trans_ == 'N') then
! Matrix is not transposed
if (allocated(a%ad)) then
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
!if (me==0) write(0,*) 'going for overlap ',a%ad%get_fmt(),' ',a%and%get_fmt()
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase1)
if (doswap_) call psi_swapdata(psb_swap_send_,&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase1)
if (do_timings) call psb_tic(mv_phase2)
call a%ad%spmm(alpha,x%v,beta,y%v,info)
if (do_timings) call psb_tic(mv_phase3)
if (doswap_) call psi_swapdata(psb_swap_recv_,&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
if (do_timings) call psb_toc(mv_phase3)
if (do_timings) call psb_tic(mv_phase4)
call a%and%spmm(alpha,x%v,zone,y%v,info)
if (do_timings) call psb_toc(mv_phase4)
end block
else
block
logical, parameter :: do_timings=.true.
real(psb_dpk_) :: t1, t2, t3, t4, t5
if (do_timings) call psb_barrier(ctxt)
if (do_timings) call psb_tic(mv_phase11)
if (doswap_) then
call psi_swapdata(ior(psb_swap_send_,psb_swap_recv_),&
& zzero,x%v,desc_a,iwork,info,data=psb_comm_halo_)
end if
if (do_timings) call psb_toc(mv_phase11)
if (do_timings) call psb_tic(mv_phase12)
call psb_csmm(alpha,a,x,beta,y,info)
if (do_timings) call psb_toc(mv_phase12)
end block
end if
if(info /= psb_success_) then
info = psb_err_from_subroutine_non_

@ -2006,8 +2006,8 @@ subroutine psb_c_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back
! onto the normal routines.
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
end subroutine psb_c_base_vect_mv
@ -2060,8 +2060,8 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999
end if
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if (present(d)) then
call d%sync()
if (present(scale)) then
@ -2082,6 +2082,7 @@ subroutine psb_c_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
if (info == psb_success_)&
& call a%inner_spsm(alpha,tmpv,beta,y,info,trans)
call y%set_host()
if (info == psb_success_) then
call tmpv%free(info)
if (info == psb_success_) deallocate(tmpv,stat=info)
@ -2161,8 +2162,11 @@ subroutine psb_c_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_
call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then
info = psb_err_from_subroutine_

@ -166,6 +166,7 @@ subroutine psb_c_coo_scals(d,a,info)
call a%make_nonunit()
end if
!$omp parallel do private(i)
do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d
enddo
@ -4174,7 +4175,6 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif
info = psb_success_
@ -4205,7 +4205,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! 'iaux' has to allow the threads to have an exclusive group
! of indices as work space. Since each thread handles one
! row/column at the time, we allocate this way.
allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info)
allocate(iaux(MAX((nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
@ -4214,7 +4214,7 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#else
allocate(iaux(nzin+2),stat=info)
allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
@ -4268,7 +4268,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
complex(psb_spk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret
integer(psb_ipk_) :: nza, nzl,iret, maxnzr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif
info = psb_success_
@ -4301,11 +4301,14 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
use_buffers = .false.
end if
if (use_buffers) then
iaux(:) = 0
!if (use_buffers) then
#if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) &
!$OMP shared(nzin,ia,nr,iaux,maxnzr) &
!$OMP private(i) &
!$OMP reduction(.and.:use_buffers)
do i=1,nzin
@ -4319,7 +4322,16 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if
end do
!$OMP END PARALLEL DO
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP private(i) shared(nr,iaux)&
!$OMP reduction(max:maxnzr)
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
!$OMP END PARALLEL DO
#else
iaux(:) = 0
!srt_inp = .true.
do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4333,8 +4345,12 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!srt_inp = srt_inp .and.(ia(i-1)<=ia(i))
end do
maxnzr = 0
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
#endif
end if
!end if
! Check again use_buffers. We enter here if nzin >= nr and
! all the indices are valid
@ -4342,22 +4358,21 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then
#if defined(OPENMP)
maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info)
allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
!$omp workshare
kaux(:) = 0
sum(:) = 0
sum(1) = 1
!$omp end workshare
err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4397,40 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j)
end do
!$omp end do
! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin
act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then
ias(idxaux(act_row)) = ia(i)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
!$omp atomic capture
i1 =idxaux(act_row)
idxaux(act_row) = idxaux(act_row) + 1
end if
!$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux'
do j=idxstart,idxend
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1
@ -4419,11 +4438,11 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements?
if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret)
& ixt,iret)
if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
& ixt)
end if
! Over each row we count the unique values
@ -4436,6 +4455,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do
end if
end do
!$omp end do
deallocate(ixt)
end block
! --------------------------------------------------
! ---------------- kaux composition ----------------
@ -4553,7 +4575,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info)
deallocate(kaux,idxaux,stat=info)
#else
!if (.not.srt_inp) then
@ -4710,7 +4732,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
& call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) &
!$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem)
@ -4732,6 +4754,9 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
idxend = idxstart + work - 1
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
! ---------------------------------------------------
first_elem = 0
@ -4746,11 +4771,10 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzl = last_elem - first_elem + 1
if (nzl > 0) then
call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret)
call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),&
& iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
& ia(first_elem:last_elem),ja(first_elem:last_elem),ixt)
end if
act_row = act_row + 1
@ -4764,6 +4788,7 @@ subroutine psb_c_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
last_elem = j
end if
end do
end block
!$OMP END PARALLEL
#else
i = 1

@ -2163,7 +2163,7 @@ subroutine psb_c_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -2328,7 +2328,7 @@ subroutine psb_c_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2461,7 +2461,7 @@ subroutine psb_c_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4058,7 +4058,7 @@ subroutine psb_lc_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -3318,7 +3318,7 @@ subroutine psb_c_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -3489,7 +3489,7 @@ subroutine psb_c_cp_csr_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
@ -3594,7 +3594,7 @@ subroutine psb_c_cp_csr_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
@ -3805,6 +3805,7 @@ contains
integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows()
nb = b%get_ncols()
@ -3815,12 +3816,19 @@ contains
! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets)
!$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
if (start_idx == 0) then
@ -3876,15 +3884,14 @@ contains
!$omp end single
!$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
!$omp barrier
!$omp single
@ -3892,9 +3899,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson
@ -3930,6 +3938,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -3996,14 +4005,14 @@ contains
!$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
!$omp barrier
!$omp single
@ -4011,9 +4020,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson_1d
@ -4312,6 +4322,266 @@ contains
end subroutine psb_ccsrspspmm
#endif
subroutine psb_c_ecsr_mold(a,b,info)
use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold
use psb_error_mod
implicit none
class(psb_c_ecsr_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_c_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_ecsr_mold
subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_csmv
implicit none
class(psb_c_ecsr_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta, x(:)
complex(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='c_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == cone).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_c_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_c_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_c_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
complex(psb_spk_), intent(in) :: alpha, x(*),val(*)
complex(psb_spk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
complex(psb_spk_) :: acc
if (alpha == czero) return
if (alpha == cone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = czero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -cone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = czero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = czero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_c_ecsr_csmv_inner
end subroutine psb_c_ecsr_csmv
subroutine psb_c_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_cmp_nerwp
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_c_ecsr_cmp_nerwp
subroutine psb_c_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_coo
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_cp_ecsr_from_coo
subroutine psb_c_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_coo
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_mv_ecsr_from_coo
subroutine psb_c_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_c_base_mat_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_fmt
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_mv_ecsr_from_fmt
subroutine psb_c_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_c_base_mat_mod
use psb_realloc_mod
use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_fmt
implicit none
class(psb_c_ecsr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_c_cp_ecsr_from_fmt
!
!
! lc version
@ -6021,7 +6291,7 @@ subroutine psb_lc_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -1213,6 +1213,106 @@ subroutine psb_c_b_csclip(a,b,info,&
end subroutine psb_c_b_csclip
subroutine psb_c_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_c_mat_mod, psb_protect_name => psb_c_split_nd
implicit none
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold
type(psb_c_coo_sparse_mat) :: acoo
type(psb_c_csr_sparse_mat), allocatable :: aclip
type(psb_c_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_split_nd
subroutine psb_c_merge_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_c_mat_mod, psb_protect_name => psb_c_merge_nd
implicit none
class(psb_cspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_c_base_sparse_mat), intent(in), optional :: mold
type(psb_c_coo_sparse_mat) :: acoo1,acoo2
integer(psb_ipk_) :: nz
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
call a%ad%mv_to_coo(acoo1,info)
call acoo1%set_bld()
call acoo1%set_nrows(n_rows)
call acoo1%set_ncols(n_cols)
call a%and%mv_to_coo(acoo2,info)
nz=acoo2%get_nzeros()
call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info)
if (allocated(a%a)) then
call a%a%free()
deallocate(a%a)
end if
allocate(a%a,mold=a%ad)
call a%a%mv_from_coo(acoo1,info)
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_merge_nd
subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod
use psb_string_mod
@ -1246,6 +1346,7 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999
end if
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
@ -1294,6 +1395,16 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
end if
call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call b%trim()
call b%set_asb()
call psb_erractionrestore(err_act)
@ -1303,7 +1414,79 @@ subroutine psb_c_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_c_base_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold
class(psb_c_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_c_cscnv
subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
@ -1318,7 +1501,6 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
character(len=*), optional, intent(in) :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold
class(psb_c_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip'
@ -1345,6 +1527,7 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
goto 9999
end if
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
@ -1377,14 +1560,22 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a%a)
call a%trim()
call a%set_asb()
call psb_erractionrestore(err_act)
@ -1394,6 +1585,77 @@ subroutine psb_c_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_c_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_c_base_sparse_mat), intent(in), optional :: mold
class(psb_c_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_c_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_c_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_c_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_c_cscnv_ip

@ -2006,8 +2006,8 @@ subroutine psb_d_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back
! onto the normal routines.
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
end subroutine psb_d_base_vect_mv
@ -2060,8 +2060,8 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999
end if
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if (present(d)) then
call d%sync()
if (present(scale)) then
@ -2082,6 +2082,7 @@ subroutine psb_d_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
if (info == psb_success_)&
& call a%inner_spsm(alpha,tmpv,beta,y,info,trans)
call y%set_host()
if (info == psb_success_) then
call tmpv%free(info)
if (info == psb_success_) deallocate(tmpv,stat=info)
@ -2161,8 +2162,11 @@ subroutine psb_d_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_
call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then
info = psb_err_from_subroutine_

@ -166,6 +166,7 @@ subroutine psb_d_coo_scals(d,a,info)
call a%make_nonunit()
end if
!$omp parallel do private(i)
do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d
enddo
@ -4174,7 +4175,6 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif
info = psb_success_
@ -4205,7 +4205,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! 'iaux' has to allow the threads to have an exclusive group
! of indices as work space. Since each thread handles one
! row/column at the time, we allocate this way.
allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info)
allocate(iaux(MAX((nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
@ -4214,7 +4214,7 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#else
allocate(iaux(nzin+2),stat=info)
allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
@ -4268,7 +4268,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
real(psb_dpk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret
integer(psb_ipk_) :: nza, nzl,iret, maxnzr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif
info = psb_success_
@ -4301,11 +4301,14 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
use_buffers = .false.
end if
if (use_buffers) then
iaux(:) = 0
!if (use_buffers) then
#if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) &
!$OMP shared(nzin,ia,nr,iaux,maxnzr) &
!$OMP private(i) &
!$OMP reduction(.and.:use_buffers)
do i=1,nzin
@ -4319,7 +4322,16 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if
end do
!$OMP END PARALLEL DO
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP private(i) shared(nr,iaux)&
!$OMP reduction(max:maxnzr)
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
!$OMP END PARALLEL DO
#else
iaux(:) = 0
!srt_inp = .true.
do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4333,8 +4345,12 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!srt_inp = srt_inp .and.(ia(i-1)<=ia(i))
end do
maxnzr = 0
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
#endif
end if
!end if
! Check again use_buffers. We enter here if nzin >= nr and
! all the indices are valid
@ -4342,22 +4358,21 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then
#if defined(OPENMP)
maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info)
allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
!$omp workshare
kaux(:) = 0
sum(:) = 0
sum(1) = 1
!$omp end workshare
err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4397,40 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j)
end do
!$omp end do
! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin
act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then
ias(idxaux(act_row)) = ia(i)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
!$omp atomic capture
i1 =idxaux(act_row)
idxaux(act_row) = idxaux(act_row) + 1
end if
!$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux'
do j=idxstart,idxend
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1
@ -4419,11 +4438,11 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements?
if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret)
& ixt,iret)
if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
& ixt)
end if
! Over each row we count the unique values
@ -4436,6 +4455,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do
end if
end do
!$omp end do
deallocate(ixt)
end block
! --------------------------------------------------
! ---------------- kaux composition ----------------
@ -4553,7 +4575,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info)
deallocate(kaux,idxaux,stat=info)
#else
!if (.not.srt_inp) then
@ -4710,7 +4732,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
& call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) &
!$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem)
@ -4732,6 +4754,9 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
idxend = idxstart + work - 1
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
! ---------------------------------------------------
first_elem = 0
@ -4746,11 +4771,10 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzl = last_elem - first_elem + 1
if (nzl > 0) then
call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret)
call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),&
& iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
& ia(first_elem:last_elem),ja(first_elem:last_elem),ixt)
end if
act_row = act_row + 1
@ -4764,6 +4788,7 @@ subroutine psb_d_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
last_elem = j
end if
end do
end block
!$OMP END PARALLEL
#else
i = 1

@ -2163,7 +2163,7 @@ subroutine psb_d_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -2328,7 +2328,7 @@ subroutine psb_d_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2461,7 +2461,7 @@ subroutine psb_d_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4058,7 +4058,7 @@ subroutine psb_ld_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -3318,7 +3318,7 @@ subroutine psb_d_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -3489,7 +3489,7 @@ subroutine psb_d_cp_csr_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_d_base_sparse_mat = a%psb_d_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
@ -3594,7 +3594,7 @@ subroutine psb_d_cp_csr_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_d_base_sparse_mat = b%psb_d_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
@ -3805,6 +3805,7 @@ contains
integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows()
nb = b%get_ncols()
@ -3815,12 +3816,19 @@ contains
! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets)
!$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
if (start_idx == 0) then
@ -3876,15 +3884,14 @@ contains
!$omp end single
!$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
!$omp barrier
!$omp single
@ -3892,9 +3899,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson
@ -3930,6 +3938,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -3996,14 +4005,14 @@ contains
!$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
!$omp barrier
!$omp single
@ -4011,9 +4020,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson_1d
@ -4312,6 +4322,266 @@ contains
end subroutine psb_dcsrspspmm
#endif
subroutine psb_d_ecsr_mold(a,b,info)
use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_mold
use psb_error_mod
implicit none
class(psb_d_ecsr_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_d_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_ecsr_mold
subroutine psb_d_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_csmv
implicit none
class(psb_d_ecsr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta, x(:)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='d_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == done).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_d_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_d_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_d_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
real(psb_dpk_), intent(in) :: alpha, x(*),val(*)
real(psb_dpk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
real(psb_dpk_) :: acc
if (alpha == dzero) return
if (alpha == done) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -done) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = dzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_d_ecsr_csmv_inner
end subroutine psb_d_ecsr_csmv
subroutine psb_d_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_ecsr_cmp_nerwp
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_d_ecsr_cmp_nerwp
subroutine psb_d_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_coo
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_cp_ecsr_from_coo
subroutine psb_d_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_coo
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_mv_ecsr_from_coo
subroutine psb_d_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_d_base_mat_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_mv_ecsr_from_fmt
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_mv_ecsr_from_fmt
subroutine psb_d_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_d_base_mat_mod
use psb_realloc_mod
use psb_d_csr_mat_mod, psb_protect_name => psb_d_cp_ecsr_from_fmt
implicit none
class(psb_d_ecsr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_d_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_d_cp_ecsr_from_fmt
!
!
! ld version
@ -6021,7 +6291,7 @@ subroutine psb_ld_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_ld_base_sparse_mat = a%psb_ld_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -1213,6 +1213,106 @@ subroutine psb_d_b_csclip(a,b,info,&
end subroutine psb_d_b_csclip
subroutine psb_d_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_d_mat_mod, psb_protect_name => psb_d_split_nd
implicit none
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold
type(psb_d_coo_sparse_mat) :: acoo
type(psb_d_csr_sparse_mat), allocatable :: aclip
type(psb_d_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_split_nd
subroutine psb_d_merge_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_d_mat_mod, psb_protect_name => psb_d_merge_nd
implicit none
class(psb_dspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_d_base_sparse_mat), intent(in), optional :: mold
type(psb_d_coo_sparse_mat) :: acoo1,acoo2
integer(psb_ipk_) :: nz
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
call a%ad%mv_to_coo(acoo1,info)
call acoo1%set_bld()
call acoo1%set_nrows(n_rows)
call acoo1%set_ncols(n_cols)
call a%and%mv_to_coo(acoo2,info)
nz=acoo2%get_nzeros()
call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info)
if (allocated(a%a)) then
call a%a%free()
deallocate(a%a)
end if
allocate(a%a,mold=a%ad)
call a%a%mv_from_coo(acoo1,info)
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_merge_nd
subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod
use psb_string_mod
@ -1246,6 +1346,7 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999
end if
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
@ -1294,6 +1395,16 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
end if
call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call b%trim()
call b%set_asb()
call psb_erractionrestore(err_act)
@ -1303,7 +1414,79 @@ subroutine psb_d_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_d_base_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold
class(psb_d_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_d_cscnv
subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
@ -1318,7 +1501,6 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
character(len=*), optional, intent(in) :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold
class(psb_d_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip'
@ -1345,6 +1527,7 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
goto 9999
end if
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
@ -1377,14 +1560,22 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a%a)
call a%trim()
call a%set_asb()
call psb_erractionrestore(err_act)
@ -1394,6 +1585,77 @@ subroutine psb_d_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_d_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_d_base_sparse_mat), intent(in), optional :: mold
class(psb_d_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_d_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_d_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_d_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_d_cscnv_ip

@ -2006,8 +2006,8 @@ subroutine psb_s_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back
! onto the normal routines.
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
end subroutine psb_s_base_vect_mv
@ -2060,8 +2060,8 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999
end if
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if (present(d)) then
call d%sync()
if (present(scale)) then
@ -2082,6 +2082,7 @@ subroutine psb_s_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
if (info == psb_success_)&
& call a%inner_spsm(alpha,tmpv,beta,y,info,trans)
call y%set_host()
if (info == psb_success_) then
call tmpv%free(info)
if (info == psb_success_) deallocate(tmpv,stat=info)
@ -2161,8 +2162,11 @@ subroutine psb_s_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_
call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then
info = psb_err_from_subroutine_

@ -166,6 +166,7 @@ subroutine psb_s_coo_scals(d,a,info)
call a%make_nonunit()
end if
!$omp parallel do private(i)
do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d
enddo
@ -4174,7 +4175,6 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif
info = psb_success_
@ -4205,7 +4205,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! 'iaux' has to allow the threads to have an exclusive group
! of indices as work space. Since each thread handles one
! row/column at the time, we allocate this way.
allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info)
allocate(iaux(MAX((nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
@ -4214,7 +4214,7 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#else
allocate(iaux(nzin+2),stat=info)
allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
@ -4268,7 +4268,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
real(psb_spk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret
integer(psb_ipk_) :: nza, nzl,iret, maxnzr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif
info = psb_success_
@ -4301,11 +4301,14 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
use_buffers = .false.
end if
if (use_buffers) then
iaux(:) = 0
!if (use_buffers) then
#if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) &
!$OMP shared(nzin,ia,nr,iaux,maxnzr) &
!$OMP private(i) &
!$OMP reduction(.and.:use_buffers)
do i=1,nzin
@ -4319,7 +4322,16 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if
end do
!$OMP END PARALLEL DO
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP private(i) shared(nr,iaux)&
!$OMP reduction(max:maxnzr)
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
!$OMP END PARALLEL DO
#else
iaux(:) = 0
!srt_inp = .true.
do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4333,8 +4345,12 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!srt_inp = srt_inp .and.(ia(i-1)<=ia(i))
end do
maxnzr = 0
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
#endif
end if
!end if
! Check again use_buffers. We enter here if nzin >= nr and
! all the indices are valid
@ -4342,22 +4358,21 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then
#if defined(OPENMP)
maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info)
allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
!$omp workshare
kaux(:) = 0
sum(:) = 0
sum(1) = 1
!$omp end workshare
err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4397,40 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j)
end do
!$omp end do
! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin
act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then
ias(idxaux(act_row)) = ia(i)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
!$omp atomic capture
i1 =idxaux(act_row)
idxaux(act_row) = idxaux(act_row) + 1
end if
!$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux'
do j=idxstart,idxend
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1
@ -4419,11 +4438,11 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements?
if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret)
& ixt,iret)
if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
& ixt)
end if
! Over each row we count the unique values
@ -4436,6 +4455,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do
end if
end do
!$omp end do
deallocate(ixt)
end block
! --------------------------------------------------
! ---------------- kaux composition ----------------
@ -4553,7 +4575,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info)
deallocate(kaux,idxaux,stat=info)
#else
!if (.not.srt_inp) then
@ -4710,7 +4732,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
& call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) &
!$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem)
@ -4732,6 +4754,9 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
idxend = idxstart + work - 1
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
! ---------------------------------------------------
first_elem = 0
@ -4746,11 +4771,10 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzl = last_elem - first_elem + 1
if (nzl > 0) then
call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret)
call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),&
& iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
& ia(first_elem:last_elem),ja(first_elem:last_elem),ixt)
end if
act_row = act_row + 1
@ -4764,6 +4788,7 @@ subroutine psb_s_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
last_elem = j
end if
end do
end block
!$OMP END PARALLEL
#else
i = 1

@ -2163,7 +2163,7 @@ subroutine psb_s_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -2328,7 +2328,7 @@ subroutine psb_s_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2461,7 +2461,7 @@ subroutine psb_s_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4058,7 +4058,7 @@ subroutine psb_ls_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -3318,7 +3318,7 @@ subroutine psb_s_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -3489,7 +3489,7 @@ subroutine psb_s_cp_csr_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_s_base_sparse_mat = a%psb_s_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
@ -3594,7 +3594,7 @@ subroutine psb_s_cp_csr_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_s_base_sparse_mat = b%psb_s_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
@ -3805,6 +3805,7 @@ contains
integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows()
nb = b%get_ncols()
@ -3815,12 +3816,19 @@ contains
! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets)
!$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
if (start_idx == 0) then
@ -3876,15 +3884,14 @@ contains
!$omp end single
!$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
!$omp barrier
!$omp single
@ -3892,9 +3899,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson
@ -3930,6 +3938,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -3996,14 +4005,14 @@ contains
!$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
!$omp barrier
!$omp single
@ -4011,9 +4020,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson_1d
@ -4312,6 +4322,266 @@ contains
end subroutine psb_scsrspspmm
#endif
subroutine psb_s_ecsr_mold(a,b,info)
use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_mold
use psb_error_mod
implicit none
class(psb_s_ecsr_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_s_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_ecsr_mold
subroutine psb_s_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_csmv
implicit none
class(psb_s_ecsr_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta, x(:)
real(psb_spk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='s_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == sone).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_s_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_s_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_s_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
real(psb_spk_), intent(in) :: alpha, x(*),val(*)
real(psb_spk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
real(psb_spk_) :: acc
if (alpha == szero) return
if (alpha == sone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = szero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -sone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = szero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = szero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_s_ecsr_csmv_inner
end subroutine psb_s_ecsr_csmv
subroutine psb_s_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_ecsr_cmp_nerwp
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_s_ecsr_cmp_nerwp
subroutine psb_s_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_coo
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_cp_ecsr_from_coo
subroutine psb_s_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_coo
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_mv_ecsr_from_coo
subroutine psb_s_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_s_base_mat_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_mv_ecsr_from_fmt
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_mv_ecsr_from_fmt
subroutine psb_s_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_s_base_mat_mod
use psb_realloc_mod
use psb_s_csr_mat_mod, psb_protect_name => psb_s_cp_ecsr_from_fmt
implicit none
class(psb_s_ecsr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_s_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_s_cp_ecsr_from_fmt
!
!
! ls version
@ -6021,7 +6291,7 @@ subroutine psb_ls_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_ls_base_sparse_mat = a%psb_ls_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -1213,6 +1213,106 @@ subroutine psb_s_b_csclip(a,b,info,&
end subroutine psb_s_b_csclip
subroutine psb_s_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_s_mat_mod, psb_protect_name => psb_s_split_nd
implicit none
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold
type(psb_s_coo_sparse_mat) :: acoo
type(psb_s_csr_sparse_mat), allocatable :: aclip
type(psb_s_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_split_nd
subroutine psb_s_merge_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_s_mat_mod, psb_protect_name => psb_s_merge_nd
implicit none
class(psb_sspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_s_base_sparse_mat), intent(in), optional :: mold
type(psb_s_coo_sparse_mat) :: acoo1,acoo2
integer(psb_ipk_) :: nz
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
call a%ad%mv_to_coo(acoo1,info)
call acoo1%set_bld()
call acoo1%set_nrows(n_rows)
call acoo1%set_ncols(n_cols)
call a%and%mv_to_coo(acoo2,info)
nz=acoo2%get_nzeros()
call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info)
if (allocated(a%a)) then
call a%a%free()
deallocate(a%a)
end if
allocate(a%a,mold=a%ad)
call a%a%mv_from_coo(acoo1,info)
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_merge_nd
subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod
use psb_string_mod
@ -1246,6 +1346,7 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999
end if
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
@ -1294,6 +1395,16 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
end if
call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call b%trim()
call b%set_asb()
call psb_erractionrestore(err_act)
@ -1303,7 +1414,79 @@ subroutine psb_s_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_s_base_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold
class(psb_s_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_s_cscnv
subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
@ -1318,7 +1501,6 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
character(len=*), optional, intent(in) :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold
class(psb_s_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip'
@ -1345,6 +1527,7 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
goto 9999
end if
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
@ -1377,14 +1560,22 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a%a)
call a%trim()
call a%set_asb()
call psb_erractionrestore(err_act)
@ -1394,6 +1585,77 @@ subroutine psb_s_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_s_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_s_base_sparse_mat), intent(in), optional :: mold
class(psb_s_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_s_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_s_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_s_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_s_cscnv_ip

@ -2006,8 +2006,8 @@ subroutine psb_z_base_vect_mv(alpha,a,x,beta,y,info,trans)
! For the time being we just throw everything back
! onto the normal routines.
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%spmm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
end subroutine psb_z_base_vect_mv
@ -2060,8 +2060,8 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
goto 9999
end if
call x%sync()
call y%sync()
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if (present(d)) then
call d%sync()
if (present(scale)) then
@ -2082,6 +2082,7 @@ subroutine psb_z_base_vect_cssv(alpha,a,x,beta,y,info,trans,scale,d)
if (info == psb_success_)&
& call a%inner_spsm(alpha,tmpv,beta,y,info,trans)
call y%set_host()
if (info == psb_success_) then
call tmpv%free(info)
if (info == psb_success_) deallocate(tmpv,stat=info)
@ -2161,8 +2162,11 @@ subroutine psb_z_base_inner_vect_sv(alpha,a,x,beta,y,info,trans)
info = psb_success_
call psb_erractionsave(err_act)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%inner_spsm(alpha,x%v,beta,y%v,info,trans)
call y%set_host()
if (info /= psb_success_) then
info = psb_err_from_subroutine_

@ -166,6 +166,7 @@ subroutine psb_z_coo_scals(d,a,info)
call a%make_nonunit()
end if
!$omp parallel do private(i)
do i=1,a%get_nzeros()
a%val(i) = a%val(i) * d
enddo
@ -4174,7 +4175,6 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
#endif
info = psb_success_
@ -4205,7 +4205,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
! 'iaux' has to allow the threads to have an exclusive group
! of indices as work space. Since each thread handles one
! row/column at the time, we allocate this way.
allocate(iaux(MAX((nc+2),(nr+2))*maxthreads),stat=info)
allocate(iaux(MAX((nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
@ -4214,7 +4214,7 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
#else
allocate(iaux(nzin+2),stat=info)
allocate(iaux(MAX((nzin+2),(nc+2),(nr+2))),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
@ -4268,7 +4268,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!locals
integer(psb_ipk_), allocatable :: ias(:),jas(:), ix2(:)
complex(psb_dpk_), allocatable :: vs(:)
integer(psb_ipk_) :: nza, nzl,iret
integer(psb_ipk_) :: nza, nzl,iret, maxnzr
integer(psb_ipk_) :: i,j, irw, icl, err_act, ip,is, imx, k, ii, i1, i2
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name = 'psb_fixcoo'
@ -4277,7 +4277,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
#if defined(OPENMP)
integer(psb_ipk_) :: work,idxstart,idxend,first_elem,last_elem,s,nthreads,ithread
integer(psb_ipk_) :: saved_elem,old_val,nxt_val,err,act_row,act_col,maxthreads
integer(psb_ipk_), allocatable :: sum(:),kaux(:),idxaux(:)
integer(psb_ipk_), allocatable :: kaux(:),idxaux(:)
#endif
info = psb_success_
@ -4301,11 +4301,14 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
use_buffers = .false.
end if
if (use_buffers) then
iaux(:) = 0
!if (use_buffers) then
#if defined(OPENMP)
!$omp workshare
iaux(:) = 0
!$omp end workshare
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP shared(nzin,ia,nr,iaux) &
!$OMP shared(nzin,ia,nr,iaux,maxnzr) &
!$OMP private(i) &
!$OMP reduction(.and.:use_buffers)
do i=1,nzin
@ -4319,7 +4322,16 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end if
end do
!$OMP END PARALLEL DO
maxnzr = 0
!$OMP PARALLEL DO default(none) schedule(STATIC) &
!$OMP private(i) shared(nr,iaux)&
!$OMP reduction(max:maxnzr)
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
!$OMP END PARALLEL DO
#else
iaux(:) = 0
!srt_inp = .true.
do i=1,nzin
if ((ia(i) < 1).or.(ia(i) > nr)) then
@ -4333,8 +4345,12 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!srt_inp = srt_inp .and.(ia(i-1)<=ia(i))
end do
maxnzr = 0
do i=1,nr
maxnzr = max(maxnzr,iaux(i))
end do
#endif
end if
!end if
! Check again use_buffers. We enter here if nzin >= nr and
! all the indices are valid
@ -4342,22 +4358,21 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
if (use_buffers) then
#if defined(OPENMP)
maxthreads = omp_get_max_threads()
allocate(kaux(nr+1),idxaux(MAX((nc+2)*maxthreads,nr)),sum(maxthreads+1),stat=info)
allocate(kaux(nr+1),idxaux(MAX(nc+2,nr+2)),stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
!$omp workshare
kaux(:) = 0
sum(:) = 0
sum(1) = 1
!$omp end workshare
err = 0
! Here, starting from 'iaux', we apply a fixing in order to obtain the starting
! index for each row. We do the same on 'kaux'
!$OMP PARALLEL default(none) &
!$OMP shared(t0,t1,idxaux,ia,ja,val,ias,jas,vs,nthreads,sum,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP shared(maxnzr,idxaux,ia,ja,val,ias,jas,vs,nthreads,nr,nc,nzin,iaux,kaux,dupl,err) &
!$OMP private(s,i,j,k,ithread,idxstart,idxend,work,nxt_val,old_val,saved_elem, &
!$OMP first_elem,last_elem,nzl,iret,act_row,i1,i2) reduction(max: info)
@ -4382,36 +4397,40 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
!write(0,*) 'fix_coo_inner: trying with exscan'
call psi_exscan(nr+1,iaux,info,shift=ione)
!$OMP BARRIER
!$OMP SINGLE
!t0 = omp_get_wtime()
!$OMP END SINGLE
! ------------------ Sorting and buffers -------------------
! Let's use an auxiliary buffer, 'idxaux', to get indices leaving
! unmodified 'iaux'
do j=idxstart,idxend
!$omp do private(j)
do j=1,nr+1
idxaux(j) = iaux(j)
end do
!$omp end do
! Here we sort data inside the auxiliary buffers
!$omp do private(act_row,i,i1)
do i=1,nzin
act_row = ia(i)
if ((act_row >= idxstart) .and. (act_row <= idxend)) then
ias(idxaux(act_row)) = ia(i)
jas(idxaux(act_row)) = ja(i)
vs(idxaux(act_row)) = val(i)
!$omp atomic capture
i1 =idxaux(act_row)
idxaux(act_row) = idxaux(act_row) + 1
end if
!$omp end atomic
ias(i1) = ia(i)
jas(i1) = ja(i)
vs(i1) = val(i)
end do
!$omp end do
!$OMP BARRIER
!$OMP SINGLE
!t1 = omp_get_wtime()
!write(0,*) ithread,'Srt&Cpy :',t1-t0
!$OMP END SINGLE
! Let's sort column indices and values. After that we will store
! the number of unique values in 'kaux'
do j=idxstart,idxend
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
!$omp do private(j,first_elem,last_elem,nzl,iret) schedule(dynamic,256)
do j=1,nr
first_elem = iaux(j)
last_elem = iaux(j+1) - 1
nzl = last_elem - first_elem + 1
@ -4419,11 +4438,11 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
! The row has elements?
if (nzl > 0) then
call psi_msort_up(nzl,jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret)
& ixt,iret)
if (iret == 0) then
call psb_ip_reord(nzl,vs(first_elem:last_elem),&
& ias(first_elem:last_elem),jas(first_elem:last_elem), &
& idxaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
& ixt)
end if
! Over each row we count the unique values
@ -4436,6 +4455,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
end do
end if
end do
!$omp end do
deallocate(ixt)
end block
! --------------------------------------------------
! ---------------- kaux composition ----------------
@ -4553,7 +4575,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzout = kaux(nr+1) - 1
deallocate(sum,kaux,idxaux,stat=info)
deallocate(kaux,idxaux,stat=info)
#else
!if (.not.srt_inp) then
@ -4710,7 +4732,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
& call psb_ip_reord(nzin,val,ia,ja,iaux)
#if defined(OPENMP)
!$OMP PARALLEL default(none) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads) &
!$OMP shared(nr,nc,nzin,iaux,ia,ja,val,nthreads,maxnzr) &
!$OMP private(i,j,idxstart,idxend,nzl,act_row,iret,ithread, &
!$OMP work,first_elem,last_elem)
@ -4732,6 +4754,9 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
idxend = idxstart + work - 1
block
integer(psb_ipk_), allocatable :: ixt(:)
allocate(ixt(2*maxnzr+2))
! ---------------------------------------------------
first_elem = 0
@ -4746,11 +4771,10 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
nzl = last_elem - first_elem + 1
if (nzl > 0) then
call psi_msort_up(nzl,ja(first_elem:),iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2),iret)
call psi_msort_up(nzl,ja(first_elem:last_elem),ixt,iret)
if (iret == 0) &
& call psb_ip_reord(nzl,val(first_elem:last_elem),&
& ia(first_elem:last_elem),ja(first_elem:last_elem),&
& iaux((ithread*(nc+2))+1:(ithread*(nc+2))+nzl+2))
& ia(first_elem:last_elem),ja(first_elem:last_elem),ixt)
end if
act_row = act_row + 1
@ -4764,6 +4788,7 @@ subroutine psb_z_fix_coo_inner_rowmajor(nr,nc,nzin,dupl,ia,ja,val,iaux,nzout,inf
last_elem = j
end if
end do
end block
!$OMP END PARALLEL
#else
i = 1

@ -2163,7 +2163,7 @@ subroutine psb_z_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -2328,7 +2328,7 @@ subroutine psb_z_cp_csc_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nc = a%get_ncols()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%icp(1:nc+1), b%icp , info)
if (info == 0) call psb_safe_cpy( a%ia(1:nz), b%ia , info)
@ -2461,7 +2461,7 @@ subroutine psb_z_cp_csc_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nc = b%get_ncols()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%icp(1:nc+1), a%icp , info)
if (info == 0) call psb_safe_cpy( b%ia(1:nz), a%ia , info)
@ -4058,7 +4058,7 @@ subroutine psb_lz_mv_csc_to_coo(a,b,info)
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -3318,7 +3318,7 @@ subroutine psb_z_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
call b%set_nzeros(a%get_nzeros())
@ -3489,7 +3489,7 @@ subroutine psb_z_cp_csr_to_fmt(a,b,info)
if (a%is_dev()) call a%sync()
b%psb_z_base_sparse_mat = a%psb_z_base_sparse_mat
nr = a%get_nrows()
nz = a%get_nzeros()
nz = max(a%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
if (info == 0) call psb_safe_cpy( a%ja(1:nz), b%ja , info)
@ -3594,7 +3594,7 @@ subroutine psb_z_cp_csr_from_fmt(a,b,info)
if (b%is_dev()) call b%sync()
a%psb_z_base_sparse_mat = b%psb_z_base_sparse_mat
nr = b%get_nrows()
nz = b%get_nzeros()
nz = max(b%get_nzeros(),ione)
if (.false.) then
if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
if (info == 0) call psb_safe_cpy( b%ja(1:nz) , a%ja , info)
@ -3805,6 +3805,7 @@ contains
integer(psb_ipk_) :: ma, nb
integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
integer(psb_ipk_) :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx
integer(psb_ipk_) :: nth, lth,ith
ma = a%get_nrows()
nb = b%get_ncols()
@ -3815,12 +3816,19 @@ contains
! dense accumulator
! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
call psb_realloc(nb, acc, info)
!$omp parallel shared(nth,lth)
!$omp single
nth = omp_get_num_threads()
lth = min(nth, ma)
!$omp end single
!$omp end parallel
allocate(offsets(omp_get_max_threads()))
!$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
!$omp shared(a,b,c,offsets)
!$omp num_threads(lth) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
if (start_idx == 0) then
@ -3876,15 +3884,14 @@ contains
!$omp end single
!$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
!$omp barrier
!$omp single
@ -3892,9 +3899,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson
@ -3930,6 +3938,7 @@ contains
!$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
thread_upperbound = 0
start_idx = 0
end_idx = 0
!$omp do schedule(static) private(irw, jj, j)
do irw = 1, ma
do jj = a%irp(irw), a%irp(irw + 1) - 1
@ -3996,14 +4005,14 @@ contains
!$omp barrier
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
if (omp_get_thread_num() /= 0) then
c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
end if
do irw = start_idx, end_idx - 1
c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
end do
end if
!$omp barrier
!$omp single
@ -4011,9 +4020,10 @@ contains
call psb_realloc(c%irp(ma + 1), c%val, info)
call psb_realloc(c%irp(ma + 1), c%ja, info)
!$omp end single
if ((start_idx /= 0).and.(start_idx <= end_idx) ) then
c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
end if
!$omp end parallel
end subroutine spmm_omp_gustavson_1d
@ -4312,6 +4322,266 @@ contains
end subroutine psb_zcsrspspmm
#endif
subroutine psb_z_ecsr_mold(a,b,info)
use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_mold
use psb_error_mod
implicit none
class(psb_z_ecsr_sparse_mat), intent(in) :: a
class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='ecsr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b,stat=info)
end if
if (info == 0) allocate(psb_z_ecsr_sparse_mat :: b, stat=info)
if (info /= 0) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_ecsr_mold
subroutine psb_z_ecsr_csmv(alpha,a,x,beta,y,info,trans)
use psb_error_mod
use psb_string_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_csmv
implicit none
class(psb_z_ecsr_sparse_mat), intent(in) :: a
complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
complex(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
character :: trans_
integer(psb_ipk_) :: m, n
logical :: tra, ctra
integer(psb_ipk_) :: err_act
integer(psb_ipk_) :: ierr(5)
character(len=20) :: name='z_csr_csmv'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (a%is_dev()) call a%sync()
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info,name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T')
ctra = (psb_toupper(trans_) == 'C')
if (tra.or.ctra) then
m = a%get_ncols()
n = a%get_nrows()
else
n = a%get_ncols()
m = a%get_nrows()
end if
if (size(x,1)<n) then
info = psb_err_input_asize_small_i_
ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if (size(y,1)<m) then
info = psb_err_input_asize_small_i_
ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
call psb_errpush(info,name,i_err=ierr)
goto 9999
end if
if ((beta == zone).and.&
& .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then
call psb_z_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
& a%nnerws,a%nerwp,x,y)
else
call a%psb_z_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
end if
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
contains
subroutine psb_z_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
& nnerws,nerwp,x,y)
integer(psb_ipk_), intent(in) :: m,n,nnerws,irp(*),ja(*),nerwp(*)
complex(psb_dpk_), intent(in) :: alpha, x(*),val(*)
complex(psb_dpk_), intent(inout) :: y(*)
integer(psb_ipk_) :: i,j,ir
complex(psb_dpk_) :: acc
if (alpha == zzero) return
if (alpha == zone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = zzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + acc
end do
else if (alpha == -zone) then
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = zzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) -acc
end do
else
!$omp parallel do private(ir,i,j,acc)
do ir=1,nnerws
i = nerwp(ir)
acc = zzero
do j=irp(i), irp(i+1)-1
acc = acc + val(j) * x(ja(j))
enddo
y(i) = y(i) + alpha*acc
end do
end if
end subroutine psb_z_ecsr_csmv_inner
end subroutine psb_z_ecsr_csmv
subroutine psb_z_ecsr_cmp_nerwp(a,info)
use psb_const_mod
use psb_realloc_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_ecsr_cmp_nerwp
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nnerws, i, nr, nzr
info = psb_success_
nr = a%get_nrows()
call psb_realloc(nr,a%nerwp,info)
nnerws = 0
do i=1, nr
nzr = a%irp(i+1)-a%irp(i)
if (nzr>0) then
nnerws = nnerws + 1
a%nerwp(nnerws) = i
end if
end do
call psb_realloc(nnerws,a%nerwp,info)
a%nnerws = nnerws
end subroutine psb_z_ecsr_cmp_nerwp
subroutine psb_z_cp_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_coo
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%cp_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_cp_ecsr_from_coo
subroutine psb_z_mv_ecsr_from_coo(a,b,info)
use psb_const_mod
use psb_realloc_mod
use psb_error_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_coo
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%mv_from_coo(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_mv_ecsr_from_coo
subroutine psb_z_mv_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_z_base_mat_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_mv_ecsr_from_fmt
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%mv_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_mv_ecsr_from_fmt
subroutine psb_z_cp_ecsr_from_fmt(a,b,info)
use psb_const_mod
use psb_z_base_mat_mod
use psb_realloc_mod
use psb_z_csr_mat_mod, psb_protect_name => psb_z_cp_ecsr_from_fmt
implicit none
class(psb_z_ecsr_sparse_mat), intent(inout) :: a
class(psb_z_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%psb_z_csr_sparse_mat%cp_from_fmt(b,info)
if (info == psb_success_) call a%cmp_nerwp(info)
end subroutine psb_z_cp_ecsr_from_fmt
!
!
! lz version
@ -6021,7 +6291,7 @@ subroutine psb_lz_mv_csr_to_coo(a,b,info)
if (a%is_dev()) call a%sync()
nr = a%get_nrows()
nc = a%get_ncols()
nza = a%get_nzeros()
nza = max(a%get_nzeros(),ione)
b%psb_lz_base_sparse_mat = a%psb_lz_base_sparse_mat
call b%set_nzeros(a%get_nzeros())

@ -1213,6 +1213,106 @@ subroutine psb_z_b_csclip(a,b,info,&
end subroutine psb_z_b_csclip
subroutine psb_z_split_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_z_mat_mod, psb_protect_name => psb_z_split_nd
implicit none
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold
type(psb_z_coo_sparse_mat) :: acoo
type(psb_z_csr_sparse_mat), allocatable :: aclip
type(psb_z_ecsr_sparse_mat), allocatable :: andclip
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
allocate(aclip)
call a%a%csclip(acoo,info,jmax=n_rows,rscale=.false.,cscale=.false.)
allocate(a%ad,mold=a%a)
call a%ad%mv_from_coo(acoo,info)
call a%a%csclip(acoo,info,jmin=n_rows+1,jmax=n_cols,rscale=.false.,cscale=.false.)
if (use_ecsr) then
allocate(andclip)
call andclip%mv_from_coo(acoo,info)
call move_alloc(andclip,a%and)
else
allocate(a%and,mold=a%a)
call a%and%mv_from_coo(acoo,info)
end if
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_split_nd
subroutine psb_z_merge_nd(a,n_rows,n_cols,info)
use psb_error_mod
use psb_string_mod
use psb_z_mat_mod, psb_protect_name => psb_z_merge_nd
implicit none
class(psb_zspmat_type), intent(inout) :: a
integer(psb_ipk_), intent(in) :: n_rows, n_cols
integer(psb_ipk_), intent(out) :: info
!!$ integer(psb_ipk_),optional, intent(in) :: dupl
!!$ character(len=*), optional, intent(in) :: type
!!$ class(psb_z_base_sparse_mat), intent(in), optional :: mold
type(psb_z_coo_sparse_mat) :: acoo1,acoo2
integer(psb_ipk_) :: nz
logical, parameter :: use_ecsr=.true.
character(len=20) :: name, ch_err
integer(psb_ipk_) :: err_act
info = psb_success_
name = 'psb_split'
call psb_erractionsave(err_act)
call a%ad%mv_to_coo(acoo1,info)
call acoo1%set_bld()
call acoo1%set_nrows(n_rows)
call acoo1%set_ncols(n_cols)
call a%and%mv_to_coo(acoo2,info)
nz=acoo2%get_nzeros()
call acoo1%csput(nz,acoo2%ia,acoo2%ja,acoo2%val,ione,n_rows,ione,n_cols,info)
if (allocated(a%a)) then
call a%a%free()
deallocate(a%a)
end if
allocate(a%a,mold=a%ad)
call a%a%mv_from_coo(acoo1,info)
if (psb_errstatus_fatal()) then
info=psb_err_from_subroutine_
call psb_errpush(info,name,a_err='cscnv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_z_merge_nd
subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
use psb_error_mod
use psb_string_mod
@ -1246,6 +1346,7 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
goto 9999
end if
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
@ -1294,6 +1395,16 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
end if
call move_alloc(altmp,b%a)
else
call inner_cp_fmt(a%a,b%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_cp_fmt(a%ad,b%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_cp_fmt(a%and,b%and,info,type,mold,dupl)
end if
end if
call b%trim()
call b%set_asb()
call psb_erractionrestore(err_act)
@ -1303,7 +1414,79 @@ subroutine psb_z_cscnv(a,b,info,type,mold,upd,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_cp_fmt(a,b,info,type,mold,dupl)
class(psb_z_base_sparse_mat), intent(in) :: a
class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold
class(psb_z_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%cp_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,b)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_cp_fmt
end subroutine psb_z_cscnv
subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
@ -1318,7 +1501,6 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
character(len=*), optional, intent(in) :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold
class(psb_z_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
character(len=20) :: name='cscnv_ip'
@ -1345,6 +1527,7 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
goto 9999
end if
if (.false.) then
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
@ -1377,14 +1560,22 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a%a, info)
call move_alloc(altmp,a%a)
else
call inner_mv_fmt(a%a,info,type,mold,dupl)
if (allocated(a%ad)) then
call inner_mv_fmt(a%ad,info,type,mold,dupl)
end if
if (allocated(a%and)) then
call inner_mv_fmt(a%and,info,type,mold,dupl)
end if
end if
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a%a)
call a%trim()
call a%set_asb()
call psb_erractionrestore(err_act)
@ -1394,6 +1585,77 @@ subroutine psb_z_cscnv_ip(a,info,type,mold,dupl)
9999 call psb_error_handler(err_act)
return
contains
subroutine inner_mv_fmt(a,info,type,mold,dupl)
class(psb_z_base_sparse_mat), intent(inout), allocatable :: a
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_),optional, intent(in) :: dupl
character(len=*), optional, intent(in) :: type
class(psb_z_base_sparse_mat), intent(in), optional :: mold
class(psb_z_base_sparse_mat), allocatable :: altmp
integer(psb_ipk_) :: err_act
info = psb_success_
call psb_erractionsave(err_act)
if (present(mold)) then
allocate(altmp, mold=mold,stat=info)
else if (present(type)) then
select case (psb_toupper(type))
case ('CSR')
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
case ('COO')
allocate(psb_z_coo_sparse_mat :: altmp, stat=info)
case ('CSC')
allocate(psb_z_csc_sparse_mat :: altmp, stat=info)
case default
info = psb_err_format_unknown_
call psb_errpush(info,name,a_err=type)
goto 9999
end select
else
allocate(psb_z_csr_sparse_mat :: altmp, stat=info)
!allocate(altmp, mold=psb_get_mat_default(a),stat=info)
end if
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info,name)
goto 9999
end if
if (present(dupl)) then
call altmp%set_dupl(dupl)
else if (a%is_bld()) then
! Does this make sense at all?? Who knows..
call altmp%set_dupl(psb_dupl_def_)
end if
if (debug) write(psb_err_unit,*) 'Converting from ',&
& a%get_fmt(),' to ',altmp%get_fmt()
call altmp%mv_from_fmt(a, info)
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info,name,a_err="mv_from")
goto 9999
end if
call move_alloc(altmp,a)
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine inner_mv_fmt
end subroutine psb_z_cscnv_ip

@ -1567,3 +1567,300 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine caxpbyv2
subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='cabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == czero) then
if (gamma == czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = czero
end do
else if (delta /= czero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = czero
end do
else if (delta /= czero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = czero ! gamma*y(i)
end do
else if (delta /= czero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = czero
z(i) = delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= czero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= czero) then
if (gamma == czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = czero
end do
else if (delta /= czero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = czero
end do
else if (delta /= czero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= czero) then
if (alpha == czero) then
if (delta == czero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= czero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= czero) then
if (delta == czero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= czero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_cabgdxyz
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_spk_), intent (in) :: x(:)
complex(psb_spk_), intent (inout) :: y(:)
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (inout) :: w(:)
complex(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='cabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==czero).or.(b==czero).or. &
& (c==czero).or.(d==czero).or.&
& (e==czero).or.(f==czero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_cxyzw

@ -1567,3 +1567,300 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine daxpbyv2
subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='dabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == dzero) then
if (gamma == dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = dzero
end do
else if (delta /= dzero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = dzero
end do
else if (delta /= dzero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = dzero ! gamma*y(i)
end do
else if (delta /= dzero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = dzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= dzero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= dzero) then
if (gamma == dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = dzero
end do
else if (delta /= dzero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = dzero
end do
else if (delta /= dzero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= dzero) then
if (alpha == dzero) then
if (delta == dzero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= dzero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= dzero) then
if (delta == dzero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= dzero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_dabgdxyz
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent (in) :: x(:)
real(psb_dpk_), intent (inout) :: y(:)
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (inout) :: w(:)
real(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='dabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==dzero).or.(b==dzero).or. &
& (c==dzero).or.(d==dzero).or.&
& (e==dzero).or.(f==dzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_dxyzw

@ -1567,3 +1567,300 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine eaxpbyv2
subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='eabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == ezero) then
if (gamma == ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = ezero
end do
else if (delta /= ezero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = ezero
end do
else if (delta /= ezero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = ezero ! gamma*y(i)
end do
else if (delta /= ezero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = ezero
z(i) = delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= ezero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= ezero) then
if (gamma == ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = ezero
end do
else if (delta /= ezero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = ezero
end do
else if (delta /= ezero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= ezero) then
if (alpha == ezero) then
if (delta == ezero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= ezero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= ezero) then
if (delta == ezero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= ezero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_eabgdxyz
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_epk_), intent (in) :: x(:)
integer(psb_epk_), intent (inout) :: y(:)
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (inout) :: w(:)
integer(psb_epk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='eabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==ezero).or.(b==ezero).or. &
& (c==ezero).or.(d==ezero).or.&
& (e==ezero).or.(f==ezero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_exyzw

@ -1567,3 +1567,300 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine i2axpbyv2
subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == i2zero) then
if (gamma == i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = i2zero ! gamma*y(i)
end do
else if (delta /= i2zero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = i2zero
z(i) = delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= i2zero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= i2zero) then
if (gamma == i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = i2zero
end do
else if (delta /= i2zero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= i2zero) then
if (alpha == i2zero) then
if (delta == i2zero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= i2zero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= i2zero) then
if (delta == i2zero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= i2zero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_i2abgdxyz
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_i2pk_), intent (in) :: x(:)
integer(psb_i2pk_), intent (inout) :: y(:)
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (inout) :: w(:)
integer(psb_i2pk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==i2zero).or.(b==i2zero).or. &
& (c==i2zero).or.(d==i2zero).or.&
& (e==i2zero).or.(f==i2zero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_i2xyzw

@ -1567,3 +1567,300 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine maxpbyv2
subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='mabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == mzero) then
if (gamma == mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = mzero
end do
else if (delta /= mzero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = mzero
end do
else if (delta /= mzero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = mzero ! gamma*y(i)
end do
else if (delta /= mzero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = mzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= mzero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= mzero) then
if (gamma == mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = mzero
end do
else if (delta /= mzero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = mzero
end do
else if (delta /= mzero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= mzero) then
if (alpha == mzero) then
if (delta == mzero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= mzero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= mzero) then
if (delta == mzero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= mzero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_mabgdxyz
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
integer(psb_mpk_), intent (in) :: x(:)
integer(psb_mpk_), intent (inout) :: y(:)
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (inout) :: w(:)
integer(psb_mpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='mabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==mzero).or.(b==mzero).or. &
& (c==mzero).or.(d==mzero).or.&
& (e==mzero).or.(f==mzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_mxyzw

@ -1567,3 +1567,300 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine saxpbyv2
subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='sabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == szero) then
if (gamma == szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = szero
end do
else if (delta /= szero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = szero
end do
else if (delta /= szero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = szero ! gamma*y(i)
end do
else if (delta /= szero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = szero
z(i) = delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= szero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= szero) then
if (gamma == szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = szero
end do
else if (delta /= szero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = szero
end do
else if (delta /= szero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= szero) then
if (alpha == szero) then
if (delta == szero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= szero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= szero) then
if (delta == szero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= szero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_sabgdxyz
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_spk_), intent (in) :: x(:)
real(psb_spk_), intent (inout) :: y(:)
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (inout) :: w(:)
real(psb_spk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='sabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==szero).or.(b==szero).or. &
& (c==szero).or.(d==szero).or.&
& (e==szero).or.(f==szero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_sxyzw

@ -1567,3 +1567,300 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
return
end subroutine zaxpbyv2
subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='zabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if (beta == zzero) then
if (gamma == zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = zzero
end do
else if (delta /= zzero) then
! a 0 b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b 0 g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = zzero
end do
else if (delta /= zzero) then
! a n b 0 g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = zzero ! gamma*y(i)
end do
else if (delta /= zzero) then
! a 0 b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = zzero
z(i) = delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b 0 g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)
end do
else if (delta /= zzero) then
! a n b 0 g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
else if (beta /= zzero) then
if (gamma == zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = zzero
end do
else if (delta /= zzero) then
! a 0 b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b n g 0 d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = zzero
end do
else if (delta /= zzero) then
! a n b n g 0 d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = delta*z(i)
end do
end if
end if
else if (gamma /= zzero) then
if (alpha == zzero) then
if (delta == zzero) then
! a 0 b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= zzero) then
! a 0 b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
else if (alpha /= zzero) then
if (delta == zzero) then
! a n b n g n d 0
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)
end do
else if (delta /= zzero) then
! a n b n g n d n
!$omp parallel do private(i)
do i=1,m
y(i) = alpha*x(i)+beta*y(i)
z(i) = gamma*y(i)+delta*z(i)
end do
end if
end if
end if
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_zabgdxyz
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
use psb_error_mod
implicit none
integer(psb_ipk_), intent(in) :: m
complex(psb_dpk_), intent (in) :: x(:)
complex(psb_dpk_), intent (inout) :: y(:)
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (inout) :: w(:)
complex(psb_dpk_), intent (in) :: a,b,c,d,e,f
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
name='zabgdxyz'
info = psb_success_
if (m.lt.0) then
info=psb_err_iarg_neg_
int_err(1)=1
int_err(2)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(x).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=6
int_err(2)=1
int_err(3)=size(x)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(y).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=7
int_err(2)=1
int_err(3)=size(y)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
else if (size(z).lt.max(1,m)) then
info=psb_err_iarg_not_gtia_ii_
int_err(1)=8
int_err(2)=1
int_err(3)=size(z)
int_err(4)=m
call fcpsb_errpush(info,name,int_err)
goto 9999
endif
if ((a==zzero).or.(b==zzero).or. &
& (c==zzero).or.(d==zzero).or.&
& (e==zzero).or.(f==zzero)) then
write(0,*) 'XYZW assumes a,b,c,d,e,f are all nonzero'
else
!$omp parallel do private(i)
do i=1,m
y(i) = a*x(i)+b*y(i)
z(i) = c*y(i)+d*z(i)
w(i) = e*z(i)+f*w(i)
end do
end if
return
9999 continue
call fcpsb_serror()
return
end subroutine psi_zxyzw

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory)
!
!
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_cspasb
use psb_sort_mod
use psi_mod
@ -58,6 +58,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_c_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals....
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_
name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins
if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_cspasb(a,desc_a, info, afmt, upd, mold)
end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_c_coo_sparse_mat) :: acoo
!!$ type(psb_c_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_c_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory)
!
!
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_dspasb
use psb_sort_mod
use psi_mod
@ -58,6 +58,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_d_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals....
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_
name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins
if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_dspasb(a,desc_a, info, afmt, upd, mold)
end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_d_coo_sparse_mat) :: acoo
!!$ type(psb_d_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_d_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -67,7 +67,7 @@ subroutine psb_icdasb(desc,info,ext_hv,mold)
integer(psb_mpk_) :: icomm
integer(psb_ipk_) :: np,me
logical :: ext_hv_
logical, parameter :: do_timings=.true.
logical, parameter :: do_timings=.false.
integer(psb_ipk_), save :: idx_phase1=-1, idx_phase2=-1, idx_phase3=-1
integer(psb_ipk_), save :: idx_phase11=-1, idx_phase12=-1, idx_phase13=-1
integer(psb_ipk_), save :: idx_total=-1

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory)
!
!
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_sspasb
use psb_sort_mod
use psi_mod
@ -58,6 +58,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_s_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals....
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_
name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins
if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_sspasb(a,desc_a, info, afmt, upd, mold)
end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_s_coo_sparse_mat) :: acoo
!!$ type(psb_s_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_s_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -44,7 +44,7 @@
! psb_upd_perm_ Permutation(more memory)
!
!
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold, bld_and)
use psb_base_mod, psb_protect_name => psb_zspasb
use psb_sort_mod
use psi_mod
@ -58,6 +58,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_), optional, intent(in) :: upd
character(len=*), optional, intent(in) :: afmt
class(psb_z_base_sparse_mat), intent(in), optional :: mold
logical, intent(in), optional :: bld_and
!....Locals....
type(psb_ctxt_type) :: ctxt
integer(psb_ipk_) :: np,me, err_act
@ -65,6 +66,7 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
integer(psb_ipk_) :: debug_level, debug_unit
character(len=20) :: name, ch_err
class(psb_i_base_vect_type), allocatable :: ivm
logical :: bld_and_
info = psb_success_
name = 'psb_spasb'
@ -93,7 +95,11 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
if (debug_level >= psb_debug_ext_)&
& write(debug_unit, *) me,' ',trim(name),&
& ' Begin matrix assembly...'
if (present(bld_and)) then
bld_and_ = bld_and
else
bld_and_ = .false.
end if
!check on errors encountered in psdspins
if (a%is_bld()) then
@ -171,7 +177,49 @@ subroutine psb_zspasb(a,desc_a, info, afmt, upd, mold)
end if
if (bld_and_) then
!!$ allocate(a%ad,mold=a%a)
!!$ allocate(a%and,mold=a%a)o
call a%split_nd(n_row,n_col,info)
!!$ block
!!$ character(len=1024) :: fname
!!$ type(psb_z_coo_sparse_mat) :: acoo
!!$ type(psb_z_csr_sparse_mat), allocatable :: aclip
!!$ type(psb_z_ecsr_sparse_mat), allocatable :: andclip
!!$ logical, parameter :: use_ecsr=.true.
!!$ allocate(aclip)
!!$ call a%a%csclip(acoo,info,jmax=n_row,rscale=.false.,cscale=.false.)
!!$ allocate(a%ad,mold=a%a)
!!$ call a%ad%mv_from_coo(acoo,info)
!!$ call a%a%csclip(acoo,info,jmin=n_row+1,jmax=n_col,rscale=.false.,cscale=.false.)
!!$ if (use_ecsr) then
!!$ allocate(andclip)
!!$ call andclip%mv_from_coo(acoo,info)
!!$ call move_alloc(andclip,a%and)
!!$ else
!!$ allocate(a%and,mold=a%a)
!!$ call a%and%mv_from_coo(acoo,info)
!!$ end if
!!$ if (.false.) then
!!$ write(fname,'(a,i2.2,a)') 'adclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%ad%print(25)
!!$ close(25)
!!$ write(fname,'(a,i2.2,a)') 'andclip_',me,'.mtx'
!!$ open(25,file=fname)
!!$ call a%and%print(25)
!!$ close(25)
!!$ !call andclip%set_cols(n_col)
!!$ write(*,*) me,' ',trim(name),' ad ',&
!!$ &a%ad%get_nrows(),a%ad%get_ncols(),n_row,n_col
!!$ write(*,*) me,' ',trim(name),' and ',&
!!$ &a%and%get_nrows(),a%and%get_ncols(),n_row,n_col
!!$ end if
!!$ end block
else
if (allocated(a%ad)) deallocate(a%ad)
if (allocated(a%and)) deallocate(a%and)
end if
if (debug_level >= psb_debug_ext_) then
ch_err=a%get_fmt()
write(debug_unit, *) me,' ',trim(name),': From SPCNV',&

@ -2018,3 +2018,252 @@ CPPFLAGS="$SAVE_CPPFLAGS";
])dnl
dnl @synopsis PAC_CHECK_SPGPU
dnl
dnl Will try to find the spgpu library and headers.
dnl
dnl Will use $CC
dnl
dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND.
dnl Note : This file will be likely to induce the compiler to create a module file
dnl (for a module called conftest).
dnl Depending on the compiler flags, this could cause a conftest.mod file to appear
dnl in the present directory, or in another, or with another name. So be warned!
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_SPGPU,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then
PAC_CHECK_CUDA()
fi
dnl AC_MSG_NOTICE([From CUDA: $pac_cv_have_cuda ])
if test "x$pac_cv_have_cuda" == "xyes"; then
AC_ARG_WITH(spgpu, AC_HELP_STRING([--with-spgpu=DIR], [Specify the directory for SPGPU library and includes.]),
[pac_cv_spgpudir=$withval],
[pac_cv_spgpudir=''])
AC_LANG([C])
if test "x$pac_cv_spgpudir" != "x"; then
LIBS="-L$pac_cv_spgpudir/lib $LIBS"
GPU_INCLUDES="-I$pac_cv_spgpudir/include"
CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
GPU_LIBDIR="-L$pac_cv_spgpudir/lib"
fi
AC_MSG_CHECKING([spgpu dir $pac_cv_spgpudir])
AC_CHECK_HEADER([core.h],
[pac_gpu_header_ok=yes],
[pac_gpu_header_ok=no; GPU_INCLUDES=""])
if test "x$pac_gpu_header_ok" == "xyes" ; then
GPU_LIBS="-lspgpu $GPU_LIBDIR"
LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
AC_TRY_LINK_FUNC(spgpuCreate,
[pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
[pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
AC_MSG_RESULT($pac_gpu_lib_ok)
if test "x$pac_cv_have_spgpu" == "xyes" ; then
AC_MSG_NOTICE([Have found SPGPU])
SPGPULIBNAME="libpsbgpu.a";
SPGPU_DIR="$pac_cv_spgpudir";
SPGPU_DEFINES="-DHAVE_SPGPU";
SPGPU_INCDIR="$SPGPU_DIR/include";
SPGPU_INCLUDES="-I$SPGPU_INCDIR";
SPGPU_LIBS="-lspgpu -L$SPGPU_DIR/lib";
LGPU=-lpsb_gpu
CUDA_DIR="$pac_cv_cuda_dir";
CUDA_DEFINES="-DHAVE_CUDA";
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$psblas_cv_define_prepend-DHAVE_GPU $psblas_cv_define_prepend-DHAVE_SPGPU $psblas_cv_define_prepend-DHAVE_CUDA $FDEFINES";
CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES" ;
fi
fi
fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])dnl
dnl @synopsis PAC_CHECK_CUDA
dnl
dnl Will try to find the cuda library and headers.
dnl
dnl Will use $CC
dnl
dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND.
dnl Note : This file will be likely to induce the compiler to create a module file
dnl (for a module called conftest).
dnl Depending on the compiler flags, this could cause a conftest.mod file to appear
dnl in the present directory, or in another, or with another name. So be warned!
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_CUDA,
[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]),
[pac_cv_cuda_dir=$withval],
[pac_cv_cuda_dir=''])
AC_LANG([C])
SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_cuda_dir" != "x"; then
CUDA_DIR="$pac_cv_cuda_dir"
LIBS="-L$pac_cv_cuda_dir/lib $LIBS"
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_DEFINES="-DHAVE_CUDA"
CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
if test -f "$pac_cv_cuda_dir/bin/nvcc"; then
CUDA_NVCC="$pac_cv_cuda_dir/bin/nvcc"
else
CUDA_NVCC="nvcc"
fi
fi
AC_MSG_CHECKING([cuda dir $pac_cv_cuda_dir])
AC_CHECK_HEADER([cuda_runtime.h],
[pac_cuda_header_ok=yes],
[pac_cuda_header_ok=no; CUDA_INCLUDES=""])
if test "x$pac_cuda_header_ok" == "xyes" ; then
CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR"
LIBS="$CUDA_LIBS -lm $LIBS";
AC_MSG_CHECKING([for cudaMemcpy in $CUDA_LIBS])
AC_TRY_LINK_FUNC(cudaMemcpy,
[pac_cv_have_cuda=yes;pac_cuda_lib_ok=yes; ],
[pac_cv_have_cuda=no;pac_cuda_lib_ok=no; CUDA_LIBS=""])
AC_MSG_RESULT($pac_cuda_lib_ok)
fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])dnl
dnl @synopsis PAC_ARG_WITH_CUDACC
dnl
dnl Test for --with-cudacc="set_of_cc".
dnl
dnl Defines the CC to compile for
dnl
dnl
dnl Example use:
dnl
dnl PAC_ARG_WITH_CUDACC
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_WITH_CUDACC],
[
AC_ARG_WITH(cudacc,
AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, for example,
--with-cudacc=50,60,70,75]),
[pac_cv_cudacc=$withval],
[pac_cv_cudacc=''])
])
AC_DEFUN(PAC_ARG_WITH_LIBRSB,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
AC_ARG_WITH(librsb,
AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
--with-librsb=/opt/packages/librsb]),
[pac_cv_librsb_dir=$withval],
[pac_cv_librsb_dir=''])
if test "x$pac_cv_librsb_dir" != "x"; then
LIBS="-L$pac_cv_librsb_dir $LIBS"
RSB_INCLUDES="-I$pac_cv_librsb_dir"
# CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
RSB_LIBDIR="-L$pac_cv_librsb_dir"
fi
#AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
[pac_rsb_header_ok=yes],
[pac_rsb_header_ok=no; RSB_INCLUDES=""])
if test "x$pac_rsb_header_ok" == "xyes" ; then
RSB_LIBS="-lrsb $RSB_LIBDIR"
# LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
# AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
# AC_TRY_LINK_FUNC(spgpuCreate,
# [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
# [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
# AC_MSG_RESULT($pac_gpu_lib_ok)
# if test "x$pac_cv_have_spgpu" == "xyes" ; then
# AC_MSG_NOTICE([Have found SPGPU])
RSBLIBNAME="librsb.a";
LIBRSB_DIR="$pac_cv_librsb_dir";
# SPGPU_DEFINES="-DHAVE_SPGPU";
LIBRSB_INCDIR="$LIBRSB_DIR";
LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
# CUDA_DIR="$pac_cv_cuda_dir";
LIBRSB_DEFINES="-DHAVE_RSB";
LRSB=-lpsb_rsb
# CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
# CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
fi
# fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])
dnl
dnl @synopsis PAC_CHECK_CUDA_VERSION
dnl
dnl Will try to find the cuda version
dnl
dnl Will use $CC
dnl
dnl If the test passes, will execute ACTION-IF-FOUND. Otherwise, ACTION-IF-NOT-FOUND.
dnl Note : This file will be likely to induce the compiler to create a module file
dnl (for a module called conftest).
dnl Depending on the compiler flags, this could cause a conftest.mod file to appear
dnl in the present directory, or in another, or with another name. So be warned!
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_CUDA_VERSION,
[AC_LANG_PUSH([C])
SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then
PAC_CHECK_CUDA()
fi
if test "x$pac_cv_have_cuda" == "xyes"; then
CUDA_DIR="$pac_cv_cuda_dir"
LIBS="-L$pac_cv_cuda_dir/lib $LIBS"
CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
CUDA_DEFINES="-DHAVE_CUDA"
CPPFLAGS="$CUDA_INCLUDES $CPPFLAGS"
CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
CUDA_LIBS="-lcusparse -lcublas -lcudart $CUDA_LIBDIR"
LIBS="$CUDA_LIBS -lm $LIBS";
AC_MSG_CHECKING([for CUDA version])
AC_LINK_IFELSE([AC_LANG_SOURCE([
#include <stdio.h>
#include <cuda.h>
int main(int argc, char **argv)
{
printf("%d",CUDA_VERSION);
return(0);
} ])],
[pac_cv_cuda_version=`./conftest${ac_exeext} | sed 's/^ *//'`;],
[pac_cv_cuda_version="unknown";])
AC_MSG_RESULT($pac_cv_cuda_version)
fi
AC_LANG_POP([C])
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])dnl

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save